From 9dda03d6be6c94cc803b6189302a8a148c5e4d12 Mon Sep 17 00:00:00 2001 From: Baber Abbasi <92168766+baberabb@users.noreply.github.com> Date: Fri, 17 Jan 2025 19:47:21 +0000 Subject: [PATCH 01/19] fix gen_prefix (#2630) * switch arg --- lm_eval/api/samplers.py | 10 ++-- lm_eval/api/task.py | 52 ++++++++++--------- lm_eval/tasks/arc/arc_challenge_chat.yaml | 2 +- .../instruct/mmlu/_continuation_template_yaml | 2 +- 4 files changed, 35 insertions(+), 31 deletions(-) diff --git a/lm_eval/api/samplers.py b/lm_eval/api/samplers.py index 21ef7ea1..23c29b2b 100644 --- a/lm_eval/api/samplers.py +++ b/lm_eval/api/samplers.py @@ -71,9 +71,9 @@ class ContextSampler: ) self.docs = self.docs.select(fewshot_indices) - def get_context(self, doc: dict, num_fewshot: int, assistant_prefill: str = None): + def get_context(self, doc: dict, num_fewshot: int, gen_prefix: str = None): # draw an extra fewshot sample if using same split as evaluating on - prefix = assistant_prefill + " " if assistant_prefill else "" + prefix = gen_prefix + " " if gen_prefix else "" n_samples = ( num_fewshot + 1 if self.config.fewshot_split == self.config.test_split @@ -115,10 +115,10 @@ class ContextSampler: doc: dict, num_fewshot: int, fewshot_as_multiturn: bool = False, - assistant_prefill: Optional[str] = None, + gen_prefix: Optional[str] = None, ): # TODO: Do we need any other delimiter - prefix = assistant_prefill + " " if assistant_prefill else "" + prefix = gen_prefix + " " if gen_prefix else "" chat_history = [] # draw an extra fewshot sample if using same split as evaluating on n_samples = ( @@ -163,7 +163,7 @@ class ContextSampler: { "role": "user", "content": self.get_context( - doc, num_fewshot, assistant_prefill=assistant_prefill + doc, num_fewshot, gen_prefix=gen_prefix ), } ) diff --git a/lm_eval/api/task.py b/lm_eval/api/task.py index 28d597c2..f14f36e8 100644 --- a/lm_eval/api/task.py +++ b/lm_eval/api/task.py @@ -93,7 +93,7 @@ class TaskConfig(dict): filter_list: Optional[Union[str, list]] = None should_decontaminate: bool = False doc_to_decontamination_query: Optional[str] = None - assistant_prefill: Optional[str] = None + gen_prefix: Optional[str] = None metadata: Optional[dict] = ( None # by default, not used in the code. allows for users to pass arbitrary info to tasks ) @@ -371,6 +371,9 @@ class Task(abc.ABC): def doc_to_image(self, doc): raise NotImplementedError + def doc_to_prefix(self, doc): + return "" + def build_all_requests( self, *, @@ -444,7 +447,7 @@ class Task(abc.ABC): apply_chat_template, fewshot_as_multiturn, chat_template, - assistant_prefill=self.config.assistant_prefill, + gen_prefix=self.doc_to_prefix(doc), ) # TODO: we should override self.config.repeats if doing greedy gen so users don't waste time+compute @@ -544,13 +547,7 @@ class Task(abc.ABC): return len(re.split(r"\s+", doc)) @utils.positional_deprecated - def fewshot_context( - self, - doc, - num_fewshot, - rnd=None, - description=None, - ): + def fewshot_context(self, doc, num_fewshot, rnd=None, description=None, **kwargs): """Returns a fewshot context string that is made up of a prepended description (if provided), the `num_fewshot` number of examples, and an appended prompt example. @@ -1006,7 +1003,7 @@ class ConfigurableTask(Task): labeled_examples: List[Dict[str, str]], question: str, fewshot_as_multiturn: bool = False, - assistant_prefill: Optional[str] = None, + gen_prefix: Optional[str] = None, ) -> None: """Adds a target question to the labeled examples list. If fewshot_as_multiturn is True, or labeled_examples is empty, or the last entry is a system turn, appends the question as a new user entry. @@ -1022,8 +1019,8 @@ class ConfigurableTask(Task): else: # if fewshot_as_multiturn is True, append as next user entry (last is always assistant) labeled_examples.append({"role": "user", "content": question}) - if assistant_prefill: - labeled_examples.append({"role": "assistant", "content": assistant_prefill}) + if gen_prefix: + labeled_examples.append({"role": "assistant", "content": gen_prefix}) @utils.positional_deprecated def fewshot_context( @@ -1034,7 +1031,7 @@ class ConfigurableTask(Task): apply_chat_template: bool = False, fewshot_as_multiturn: bool = False, chat_template: Optional[Callable] = None, - assistant_prefill: Optional[str] = None, + gen_prefix: Optional[str] = None, ) -> Union[str, List[str]]: """Returns a fewshot context string that is made up of a prepended description (if provided), the `num_fewshot` number of examples, and an appended prompt example. @@ -1081,7 +1078,6 @@ class ConfigurableTask(Task): labeled_examples.append({"role": "system", "content": system_prompt}) else: labeled_examples = system_prompt - # if few-shot - append examples after the system prompt if num_fewshot > 0: if apply_chat_template: @@ -1090,12 +1086,12 @@ class ConfigurableTask(Task): doc, num_fewshot, fewshot_as_multiturn, - assistant_prefill=assistant_prefill, + gen_prefix=gen_prefix, ) ) else: labeled_examples += self.sampler.get_context( - doc, num_fewshot, assistant_prefill=assistant_prefill + doc, num_fewshot, gen_prefix=gen_prefix ) example = self.doc_to_text(doc) @@ -1108,7 +1104,7 @@ class ConfigurableTask(Task): labeled_examples, example, fewshot_as_multiturn, - assistant_prefill=assistant_prefill, + gen_prefix=gen_prefix, ) # for loglikelihood create a list of questions with appended choices elif isinstance(example, list): @@ -1120,13 +1116,13 @@ class ConfigurableTask(Task): chat, ex, fewshot_as_multiturn, - assistant_prefill=assistant_prefill, + gen_prefix=gen_prefix, ) # TODO: append prefill? labeled_examples_list.append( chat_template( chat, - add_generation_prompt=False if assistant_prefill else True, + add_generation_prompt=False if gen_prefix else True, ) ) return labeled_examples_list @@ -1138,24 +1134,24 @@ class ConfigurableTask(Task): labeled_examples, choices[example], fewshot_as_multiturn, - assistant_prefill=assistant_prefill, + gen_prefix=gen_prefix, ) else: self.append_target_question( labeled_examples, str(example), fewshot_as_multiturn, - assistant_prefill=assistant_prefill, + gen_prefix=gen_prefix, ) # return lm.apply_chat_template(labeled_examples) return chat_template( labeled_examples, - add_generation_prompt=False if assistant_prefill else True, + add_generation_prompt=False if gen_prefix else True, ) else: prefix = ( - self.config.target_delimiter + assistant_prefill - if assistant_prefill is not None + self.config.target_delimiter + gen_prefix + if gen_prefix is not None else "" ) if self.multiple_input: @@ -1342,6 +1338,14 @@ class ConfigurableTask(Task): else: return None + def doc_to_prefix(self, doc): + if (gen_prefix := self.config.gen_prefix) is not None: + if gen_prefix in self.features: + return doc[gen_prefix] + else: + return utils.apply_template(gen_prefix, doc) + return None + def construct_requests( self, doc: dict, ctx: str, **kwargs ) -> Union[List[Instance], Instance]: diff --git a/lm_eval/tasks/arc/arc_challenge_chat.yaml b/lm_eval/tasks/arc/arc_challenge_chat.yaml index 00089272..014e811c 100644 --- a/lm_eval/tasks/arc/arc_challenge_chat.yaml +++ b/lm_eval/tasks/arc/arc_challenge_chat.yaml @@ -9,7 +9,7 @@ validation_split: validation test_split: test fewshot_split: train doc_to_text: 'Given the following question and four candidate answers (A, B, C and D), choose the best answer.\nQuestion: {{question.strip()}}\nA. {{choices.text[0]}}\nB. {{choices.text[1]}}\nC. {{choices.text[2]}}{% if choices.text|length > 3 %}\nD. {{choices.text[3]}}{% endif %}\nYour response should end with "The best answer is [the_answer_letter]" where the [the_answer_letter] is one of A, B, C or D.' -assistant_prefill: 'The best answer is' +gen_prefix: 'The best answer is' fewshot_delimiter: "\n\n" doc_to_target: "{{ 'ABCD'[answerKey|int - 1] if answerKey|string in '1234' else answerKey }}" num_fewshot: 0 diff --git a/lm_eval/tasks/llama3/instruct/mmlu/_continuation_template_yaml b/lm_eval/tasks/llama3/instruct/mmlu/_continuation_template_yaml index db38766a..7afb094b 100644 --- a/lm_eval/tasks/llama3/instruct/mmlu/_continuation_template_yaml +++ b/lm_eval/tasks/llama3/instruct/mmlu/_continuation_template_yaml @@ -5,7 +5,7 @@ fewshot_split: dev fewshot_config: sampler: first_n doc_to_text: "Given the following question and four candidate answers (A, B, C and D), choose the best answer.\nQuestion: {{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nYour response should end with \"The best answer is [the_answer_letter]\" where the [the_answer_letter] is one of A, B, C or D." -assistant_prefill: "The best answer is" +gen_prefix: "The best answer is" doc_to_target: "{{['A.','B.','C.','D.'][answer]}}" num_fewshot: 5 metric_list: -- GitLab From f724be699e8adf7ca8004ea0e519dfac83a06f18 Mon Sep 17 00:00:00 2001 From: Baber Abbasi <92168766+baberabb@users.noreply.github.com> Date: Sun, 19 Jan 2025 01:08:55 +0000 Subject: [PATCH 02/19] update pre-commit (#2632) * update pre-commit --- .pre-commit-config.yaml | 2 +- lm_eval/api/group.py | 4 +-- lm_eval/api/metrics.py | 6 ++-- lm_eval/api/registry.py | 30 +++++++++---------- lm_eval/api/samplers.py | 6 ++-- lm_eval/decontamination/decontaminate.py | 2 +- lm_eval/filters/selection.py | 6 ++-- lm_eval/filters/transformation.py | 6 ++-- lm_eval/loggers/evaluation_tracker.py | 4 +-- lm_eval/loggers/wandb_logger.py | 4 +-- lm_eval/models/api_models.py | 12 ++++---- lm_eval/models/hf_vlms.py | 12 ++++---- lm_eval/models/huggingface.py | 28 +++++++++-------- lm_eval/models/neuron_optimum.py | 16 +++++----- lm_eval/models/openai_completions.py | 25 ++++++++-------- lm_eval/models/optimum_ipex.py | 6 ++-- lm_eval/models/optimum_lm.py | 6 ++-- lm_eval/models/utils.py | 6 ++-- lm_eval/models/vllm_causallms.py | 12 ++++---- lm_eval/tasks/arabicmmlu/utils.py | 2 +- .../flores_eu/create_yamls_flores_eu.py | 2 +- .../flores_ca/create_yamls_flores_ca.py | 2 +- lm_eval/tasks/csatqa/utils.py | 2 +- .../flores_gl/create_yamls_flores_gl.py | 2 +- lm_eval/tasks/ifeval/instructions.py | 2 +- lm_eval/tasks/ifeval/instructions_util.py | 9 +++--- .../ja_leaderboard_mgsm.py | 6 ++-- .../tasks/leaderboard/ifeval/instructions.py | 2 +- .../leaderboard/ifeval/instructions_util.py | 6 ++-- lm_eval/tasks/leaderboard/musr/utils.py | 4 +-- lm_eval/tasks/lingoly/utils.py | 6 ++-- .../flores_pt/create_yamls_flores_pt.py | 2 +- lm_eval/tasks/score/non_greedy_summarizer.py | 6 ++-- .../flores_es/create_yamls_flores_es.py | 2 +- lm_eval/tasks/squadv2/task.py | 6 ++-- .../tasks/tmlu/default/_generate_configs.py | 3 +- lm_eval/utils.py | 6 ++-- scripts/model_comparator.py | 2 +- scripts/zeno_visualize.py | 6 ++-- tests/test_tasks.py | 4 ++- 40 files changed, 138 insertions(+), 137 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index edeef333..3b5da239 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -29,7 +29,7 @@ repos: - id: mixed-line-ending args: [--fix=lf] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.7.4 + rev: v0.9.2 hooks: # Run the linter. - id: ruff diff --git a/lm_eval/api/group.py b/lm_eval/api/group.py index e258692b..0c60739b 100644 --- a/lm_eval/api/group.py +++ b/lm_eval/api/group.py @@ -112,6 +112,4 @@ class ConfigurableGroup(abc.ABC): return self._config.group def __repr__(self): - return ( - f"ConfigurableGroup(group={self.group}," f"group_alias={self.group_alias})" - ) + return f"ConfigurableGroup(group={self.group},group_alias={self.group_alias})" diff --git a/lm_eval/api/metrics.py b/lm_eval/api/metrics.py index a8459aa7..56ba231b 100644 --- a/lm_eval/api/metrics.py +++ b/lm_eval/api/metrics.py @@ -527,9 +527,9 @@ def pooled_sample_stderr(stderrs: List[float], sizes: List[int]): def combined_sample_stderr(stderrs: List[float], sizes: List[int], metrics=None): - assert ( - metrics is not None - ), "Need to pass a list of each subtask's metric for this stderr aggregation" + assert metrics is not None, ( + "Need to pass a list of each subtask's metric for this stderr aggregation" + ) assert len(stderrs) == len(sizes) and len(sizes) == len(metrics) # See https://github.com/EleutherAI/lm-evaluation-harness/pull/1390 for more documentation. diff --git a/lm_eval/api/registry.py b/lm_eval/api/registry.py index f8f28937..6d16639e 100644 --- a/lm_eval/api/registry.py +++ b/lm_eval/api/registry.py @@ -17,13 +17,13 @@ def register_model(*names): def decorate(cls): for name in names: - assert issubclass( - cls, LM - ), f"Model '{name}' ({cls.__name__}) must extend LM class" + assert issubclass(cls, LM), ( + f"Model '{name}' ({cls.__name__}) must extend LM class" + ) - assert ( - name not in MODEL_REGISTRY - ), f"Model named '{name}' conflicts with existing model! Please register with a non-conflicting alias instead." + assert name not in MODEL_REGISTRY, ( + f"Model named '{name}' conflicts with existing model! Please register with a non-conflicting alias instead." + ) MODEL_REGISTRY[name] = cls return cls @@ -48,9 +48,9 @@ func2task_index = {} def register_task(name): def decorate(fn): - assert ( - name not in TASK_REGISTRY - ), f"task named '{name}' conflicts with existing registered task!" + assert name not in TASK_REGISTRY, ( + f"task named '{name}' conflicts with existing registered task!" + ) TASK_REGISTRY[name] = fn ALL_TASKS.add(name) @@ -104,9 +104,9 @@ def register_metric(**args): ]: if key in args: value = args[key] - assert ( - value not in registry - ), f"{key} named '{value}' conflicts with existing registered {key}!" + assert value not in registry, ( + f"{key} named '{value}' conflicts with existing registered {key}!" + ) if key == "metric": registry[name] = fn @@ -140,9 +140,9 @@ def get_metric(name: str, hf_evaluate_metric=False) -> Callable: def register_aggregation(name: str): def decorate(fn): - assert ( - name not in AGGREGATION_REGISTRY - ), f"aggregation named '{name}' conflicts with existing registered aggregation!" + assert name not in AGGREGATION_REGISTRY, ( + f"aggregation named '{name}' conflicts with existing registered aggregation!" + ) AGGREGATION_REGISTRY[name] = fn return fn diff --git a/lm_eval/api/samplers.py b/lm_eval/api/samplers.py index 23c29b2b..3f81dfc6 100644 --- a/lm_eval/api/samplers.py +++ b/lm_eval/api/samplers.py @@ -184,9 +184,9 @@ class FirstNSampler(ContextSampler): Draw the first `n` samples in order from the specified split. Used for tasks with "canonical" ordered fewshot examples, such as MMLU and CMMLU. """ - assert ( - n <= len(self.docs) - ), f"Error: number of fewshot samples requested exceeds the {len(self.docs)} that are available." + assert n <= len(self.docs), ( + f"Error: number of fewshot samples requested exceeds the {len(self.docs)} that are available." + ) return self.docs[:n] diff --git a/lm_eval/decontamination/decontaminate.py b/lm_eval/decontamination/decontaminate.py index 3874eb58..2d1250d3 100644 --- a/lm_eval/decontamination/decontaminate.py +++ b/lm_eval/decontamination/decontaminate.py @@ -151,7 +151,7 @@ def get_train_overlap(docs_by_task_set: dict, ngrams_path: str, limit: int) -> d elapsed = time.perf_counter() - start print(f"Read took {elapsed:0.5f} seconds.") - print(f"Speed: {(os.path.getsize(file)/1000000.0)/elapsed}MB/second") + print(f"Speed: {(os.path.getsize(file) / 1000000.0) / elapsed}MB/second") print(duplicates) diff --git a/lm_eval/filters/selection.py b/lm_eval/filters/selection.py index 6e368b59..8c670ed7 100644 --- a/lm_eval/filters/selection.py +++ b/lm_eval/filters/selection.py @@ -34,9 +34,9 @@ class TakeKFilter(Filter): # need resp to be subscriptable to check below resps = list(resps) # check we have at least k responses per doc, else we can't take the first k - assert ( - len(resps[0]) >= self.k - ), f"Need at least {self.k} responses per doc to take first {self.k}, but got {len(resps[0])} only! Please increase TaskConfig.repeats ." + assert len(resps[0]) >= self.k, ( + f"Need at least {self.k} responses per doc to take first {self.k}, but got {len(resps[0])} only! Please increase TaskConfig.repeats ." + ) return map(lambda r: r[: self.k], resps) diff --git a/lm_eval/filters/transformation.py b/lm_eval/filters/transformation.py index cac1c592..1a3592b6 100644 --- a/lm_eval/filters/transformation.py +++ b/lm_eval/filters/transformation.py @@ -43,9 +43,9 @@ class MapFilter(Filter): """ if mapping_dict is None: mapping_dict = {} - assert isinstance( - mapping_dict, dict - ), "Provided mapping_dict is not a dictionary" + assert isinstance(mapping_dict, dict), ( + "Provided mapping_dict is not a dictionary" + ) self.mapping_dict = mapping_dict self.default_value = default_value diff --git a/lm_eval/loggers/evaluation_tracker.py b/lm_eval/loggers/evaluation_tracker.py index 067b047b..4067c50e 100644 --- a/lm_eval/loggers/evaluation_tracker.py +++ b/lm_eval/loggers/evaluation_tracker.py @@ -488,7 +488,7 @@ class EvaluationTracker: else: dataset_summary += f"{self.general_config_tracker.model_name}\n" dataset_summary += ( - f"The dataset is composed of {len(card_metadata)-1} configuration(s), each one corresponding to one of the evaluated task.\n\n" + f"The dataset is composed of {len(card_metadata) - 1} configuration(s), each one corresponding to one of the evaluated task.\n\n" f"The dataset has been created from {len(results_files)} run(s). Each run can be found as a specific split in each " 'configuration, the split being named using the timestamp of the run.The "train" split is always pointing to the latest results.\n\n' 'An additional configuration "results" store all the aggregated results of the run.\n\n' @@ -501,7 +501,7 @@ class EvaluationTracker: ) dataset_summary += ( "## Latest results\n\n" - f'These are the [latest results from run {latest_datetime}]({last_results_file_path.replace("/resolve/", "/blob/")}) ' + f"These are the [latest results from run {latest_datetime}]({last_results_file_path.replace('/resolve/', '/blob/')}) " "(note that there might be results for other tasks in the repos if successive evals didn't cover the same tasks. " 'You find each in the results and the "latest" split for each eval):\n\n' f"```python\n{results_string}\n```" diff --git a/lm_eval/loggers/wandb_logger.py b/lm_eval/loggers/wandb_logger.py index b50ee03c..53a886fc 100644 --- a/lm_eval/loggers/wandb_logger.py +++ b/lm_eval/loggers/wandb_logger.py @@ -225,7 +225,7 @@ class WandbLogger: instance = [x["arguments"][0][0] for x in data] labels = [x["arguments"][0][1] for x in data] resps = [ - f'log probability of continuation is {x["resps"][0][0][0]} ' + f"log probability of continuation is {x['resps'][0][0][0]} " + "\n\n" + "continuation will {} generated with greedy sampling".format( "not be" if not x["resps"][0][0][1] else "be" @@ -233,7 +233,7 @@ class WandbLogger: for x in data ] filtered_resps = [ - f'log probability of continuation is {x["filtered_resps"][0][0]} ' + f"log probability of continuation is {x['filtered_resps'][0][0]} " + "\n\n" + "continuation will {} generated with greedy sampling".format( "not be" if not x["filtered_resps"][0][1] else "be" diff --git a/lm_eval/models/api_models.py b/lm_eval/models/api_models.py index 80678f5c..c24cea95 100644 --- a/lm_eval/models/api_models.py +++ b/lm_eval/models/api_models.py @@ -195,9 +195,9 @@ class TemplateAPI(TemplateLM): """Helper method to transform the prompt into the expected API input format. messages consist of batched requests""" if isinstance(messages[0], JsonChatStr): # for chat completions we need to decode the json string to list[dict,...] - assert ( - self._batch_size == 1 - ), "non-tokenized chat requests are only supported with batch_size=1" + assert self._batch_size == 1, ( + "non-tokenized chat requests are only supported with batch_size=1" + ) # list[dict["role":..., "content":...],...] return json.loads(messages[0].prompt) @@ -506,9 +506,9 @@ class TemplateAPI(TemplateLM): return await tqdm_asyncio.gather(*tasks, desc="Requesting API") def _loglikelihood_tokens(self, requests, **kwargs) -> List[Tuple[float, bool]]: - assert ( - self.tokenizer is not None - ), "Tokenizer is required for loglikelihood tasks to compute context lengths." + assert self.tokenizer is not None, ( + "Tokenizer is required for loglikelihood tasks to compute context lengths." + ) res = [] def _collate(req: LogLikelihoodInputs): diff --git a/lm_eval/models/hf_vlms.py b/lm_eval/models/hf_vlms.py index 05584ac0..4e67debe 100644 --- a/lm_eval/models/hf_vlms.py +++ b/lm_eval/models/hf_vlms.py @@ -51,9 +51,9 @@ class HFMultimodalLM(HFLM): # modify init behavior. super().__init__(pretrained, **kwargs) - assert ( - self.batch_size != "auto" - ), "Batch size 'auto' is not yet supported for hf-multimodal models." + assert self.batch_size != "auto", ( + "Batch size 'auto' is not yet supported for hf-multimodal models." + ) self.chat_applied: bool = False # TODO: phi-3.5 "image placeholders" are , , ... in order. how to handle this case @@ -73,9 +73,9 @@ class HFMultimodalLM(HFLM): or getattr(self.config, "image_token_index", None) ) ) - assert ( - self.image_token_id is not None - ), "Must have a non-None image_token_id to evaluate a Hugging Face AutoModelForVision2Seq model. Please pass `image_token_id` in `--model_args` if model's config does not already specify one." + assert self.image_token_id is not None, ( + "Must have a non-None image_token_id to evaluate a Hugging Face AutoModelForVision2Seq model. Please pass `image_token_id` in `--model_args` if model's config does not already specify one." + ) # get the string this token ID corresponds to self.image_token = self.tok_decode( [self.image_token_id], skip_special_tokens=False diff --git a/lm_eval/models/huggingface.py b/lm_eval/models/huggingface.py index 961b7b4b..919d505a 100644 --- a/lm_eval/models/huggingface.py +++ b/lm_eval/models/huggingface.py @@ -99,7 +99,9 @@ class HFLM(TemplateLM): eval_logger.warning( "`pretrained` model kwarg is not of type `str`. Many other model arguments may be ignored. Please do not launch via accelerate or use `parallelize=True` if passing an existing model this way." ) - assert not parallelize, "`parallelize=True` is not compatible with passing pre-initialized model to `pretrained`" + assert not parallelize, ( + "`parallelize=True` is not compatible with passing pre-initialized model to `pretrained`" + ) self._model = pretrained self._device = self._model.device self._config = self._model.config @@ -571,9 +573,9 @@ class HFLM(TemplateLM): if not autogptq and not gptqmodel: if model_kwargs.get("load_in_4bit", None): - assert ( - transformers.__version__ >= "4.30.0" - ), "load_in_4bit requires transformers >= 4.30.0" + assert transformers.__version__ >= "4.30.0", ( + "load_in_4bit requires transformers >= 4.30.0" + ) if transformers.__version__ >= "4.30.0": if model_kwargs.get("load_in_4bit", None): if model_kwargs.get("bnb_4bit_compute_dtype", None): @@ -905,16 +907,16 @@ class HFLM(TemplateLM): self, logits: torch.Tensor, contlen: int = None, inplen: int = None ) -> torch.Tensor: if self.backend == "causal": - assert ( - contlen and inplen - ), "Must pass input len and cont. len to select scored logits for causal LM" + assert contlen and inplen, ( + "Must pass input len and cont. len to select scored logits for causal LM" + ) # discard right-padding. # also discard the input/context tokens. we'll only score continuations. logits = logits[inplen - contlen : inplen] elif self.backend == "seq2seq": - assert ( - contlen and not inplen - ), "Selecting scored logits for Seq2SeqLM requires only cont. len" + assert contlen and not inplen, ( + "Selecting scored logits for Seq2SeqLM requires only cont. len" + ) # only discard right-padding. # the logits input to this fn only contain decoder-side tokens. logits = logits[:contlen] @@ -1329,9 +1331,9 @@ class HFLM(TemplateLM): if self.backend == "causal": # max len for inputs = max length, minus room to generate the max new tokens max_ctx_len = self.max_length - max_gen_toks - assert ( - max_ctx_len > 0 - ), f"Invalid configuration: requested max tokens to generate ({max_gen_toks}) must be less than model's maximum sequence length ({self.max_length})." + assert max_ctx_len > 0, ( + f"Invalid configuration: requested max tokens to generate ({max_gen_toks}) must be less than model's maximum sequence length ({self.max_length})." + ) elif self.backend == "seq2seq": # max len for inputs = encoder's whole max_length max_ctx_len = self.max_length diff --git a/lm_eval/models/neuron_optimum.py b/lm_eval/models/neuron_optimum.py index ca2aaf65..2f3aa929 100644 --- a/lm_eval/models/neuron_optimum.py +++ b/lm_eval/models/neuron_optimum.py @@ -206,7 +206,7 @@ class NEURON_HF(TemplateLM): "Only float16/bfloat16/float32 are supported." ) - print(f"{'='*20} \n exporting model to neuron") + print(f"{'=' * 20} \n exporting model to neuron") self.model = CustomNeuronModelForCausalLM.from_pretrained( pretrained, revision=revision, @@ -220,19 +220,17 @@ class NEURON_HF(TemplateLM): ) neuron_config = self.model.config.neuron print( - f"SUCCESS: neuron model exported with config {neuron_config}. \n {'='*20}" + f"SUCCESS: neuron model exported with config {neuron_config}. \n {'=' * 20}" ) else: - print( - f"{'='*20} \n loading neuron model with config" f" {neuron_config}..." - ) + print(f"{'=' * 20} \n loading neuron model with config {neuron_config}...") self.model = CustomNeuronModelForCausalLM.from_pretrained( pretrained, revision=revision, trust_remote_code=trust_remote_code, low_cpu_mem_usage=low_cpu_mem_usage, ) - print(f"SUCCESS: neuron model loaded. \n {'='*20}") + print(f"SUCCESS: neuron model loaded. \n {'=' * 20}") self.truncation = truncation @@ -353,9 +351,9 @@ class NEURON_HF(TemplateLM): ) def _select_cont_toks(self, logits, contlen=None, inplen=None): - assert ( - contlen and inplen - ), "Must pass input len and cont. len to select scored logits for causal LM" + assert contlen and inplen, ( + "Must pass input len and cont. len to select scored logits for causal LM" + ) # discard right-padding. # also discard the input/context tokens. we'll only score continuations. logits = logits[inplen - contlen : inplen] diff --git a/lm_eval/models/openai_completions.py b/lm_eval/models/openai_completions.py index 223fa236..1afc0f6a 100644 --- a/lm_eval/models/openai_completions.py +++ b/lm_eval/models/openai_completions.py @@ -134,9 +134,9 @@ class LocalChatCompletion(LocalCompletionsAPI): eos=None, **kwargs, ) -> dict: - assert ( - type(messages) is not str - ), "chat-completions require the --apply_chat_template flag." + assert type(messages) is not str, ( + "chat-completions require the --apply_chat_template flag." + ) gen_kwargs.pop("do_sample", False) if "max_tokens" in gen_kwargs: max_tokens = gen_kwargs.pop("max_tokens") @@ -208,13 +208,12 @@ class OpenAICompletionsAPI(LocalCompletionsAPI): return key def loglikelihood(self, requests, **kwargs): - assert ( - self.model - in [ - "babbage-002", - "davinci-002", - ] - ), f"Prompt loglikelihoods are only supported by OpenAI's API for {['babbage-002', 'davinci-002']}." + assert self.model in [ + "babbage-002", + "davinci-002", + ], ( + f"Prompt loglikelihoods are only supported by OpenAI's API for {['babbage-002', 'davinci-002']}." + ) return super().loglikelihood(requests, **kwargs) def chat_template(self, chat_template: Union[bool, str] = False) -> Optional[str]: @@ -265,9 +264,9 @@ class OpenAIChatCompletion(LocalChatCompletion): eos="<|endoftext|>", **kwargs, ) -> dict: - assert ( - type(messages) is not str - ), "chat-completions require the --apply_chat_template flag." + assert type(messages) is not str, ( + "chat-completions require the --apply_chat_template flag." + ) gen_kwargs.pop("do_sample", False) if "max_tokens" in gen_kwargs: max_tokens = gen_kwargs.pop("max_tokens") diff --git a/lm_eval/models/optimum_ipex.py b/lm_eval/models/optimum_ipex.py index 56776da1..68d38528 100644 --- a/lm_eval/models/optimum_ipex.py +++ b/lm_eval/models/optimum_ipex.py @@ -21,9 +21,9 @@ class IPEXLM(HFLM): ) -> None: if "backend" in kwargs: # currently only supports causal models - assert ( - kwargs["backend"] == "causal" - ), "Currently, only IPEXModelForCausalLM is supported." + assert kwargs["backend"] == "causal", ( + "Currently, only IPEXModelForCausalLM is supported." + ) super().__init__( backend=kwargs.pop("backend", "causal"), diff --git a/lm_eval/models/optimum_lm.py b/lm_eval/models/optimum_lm.py index b13b321f..de5e2460 100644 --- a/lm_eval/models/optimum_lm.py +++ b/lm_eval/models/optimum_lm.py @@ -29,9 +29,9 @@ class OptimumLM(HFLM): ) -> None: if "backend" in kwargs: # optimum currently only supports causal models - assert ( - kwargs["backend"] == "causal" - ), "Currently, only OVModelForCausalLM is supported." + assert kwargs["backend"] == "causal", ( + "Currently, only OVModelForCausalLM is supported." + ) self.openvino_device = device diff --git a/lm_eval/models/utils.py b/lm_eval/models/utils.py index e7c28c3e..8d672c12 100644 --- a/lm_eval/models/utils.py +++ b/lm_eval/models/utils.py @@ -155,9 +155,9 @@ def pad_and_concat( length in the batch. Used for batching inputs and continuations in seq2seq models. """ - assert ( - padding_side == "left" or padding_side == "right" - ), f"Unrecognized padding type: '{padding_side}' not 'left' or 'right'" + assert padding_side == "left" or padding_side == "right", ( + f"Unrecognized padding type: '{padding_side}' not 'left' or 'right'" + ) for i, tensor in enumerate(tensors): if len(tensor.shape) == 2: diff --git a/lm_eval/models/vllm_causallms.py b/lm_eval/models/vllm_causallms.py index 513a137b..5718cb5d 100644 --- a/lm_eval/models/vllm_causallms.py +++ b/lm_eval/models/vllm_causallms.py @@ -76,9 +76,9 @@ class VLLM(TemplateLM): ) assert "cuda" in device or device is None, "vLLM only supports CUDA" - assert ( - max_length is None or max_model_len is None - ), "Either max_length or max_model_len may be provided, but not both" + assert max_length is None or max_model_len is None, ( + "Either max_length or max_model_len may be provided, but not both" + ) self._max_length = max_model_len if max_model_len is not None else max_length self.tensor_parallel_size = int(tensor_parallel_size) @@ -142,9 +142,9 @@ class VLLM(TemplateLM): self._max_gen_toks = max_gen_toks if lora_local_path is not None: - assert parse_version(version("vllm")) > parse_version( - "0.3.0" - ), "lora adapters only compatible with vllm > v0.3.0." + assert parse_version(version("vllm")) > parse_version("0.3.0"), ( + "lora adapters only compatible with vllm > v0.3.0." + ) self.lora_request = LoRARequest("finetuned", 1, lora_local_path) else: self.lora_request = None diff --git a/lm_eval/tasks/arabicmmlu/utils.py b/lm_eval/tasks/arabicmmlu/utils.py index e1ed4b99..2c476131 100644 --- a/lm_eval/tasks/arabicmmlu/utils.py +++ b/lm_eval/tasks/arabicmmlu/utils.py @@ -41,4 +41,4 @@ def doc_to_text(doc): def doc_to_choice(doc): - return [alpa[i][0] for i in range(5) if doc[f"Option {i+1}"]] + return [alpa[i][0] for i in range(5) if doc[f"Option {i + 1}"]] diff --git a/lm_eval/tasks/basque_bench/flores_eu/create_yamls_flores_eu.py b/lm_eval/tasks/basque_bench/flores_eu/create_yamls_flores_eu.py index 723edc51..52c2afb1 100644 --- a/lm_eval/tasks/basque_bench/flores_eu/create_yamls_flores_eu.py +++ b/lm_eval/tasks/basque_bench/flores_eu/create_yamls_flores_eu.py @@ -258,7 +258,7 @@ def doc_to_text(src: str, tgt: str) -> str: src_name, tgt_name = map(code_to_language_name, [src, tgt]) return f"""\ -{src_name} sentence: {jinja_var('sentence_' + src)} +{src_name} sentence: {jinja_var("sentence_" + src)} {tgt_name} sentence:""" diff --git a/lm_eval/tasks/catalan_bench/flores_ca/create_yamls_flores_ca.py b/lm_eval/tasks/catalan_bench/flores_ca/create_yamls_flores_ca.py index 6125b972..c8f3e559 100644 --- a/lm_eval/tasks/catalan_bench/flores_ca/create_yamls_flores_ca.py +++ b/lm_eval/tasks/catalan_bench/flores_ca/create_yamls_flores_ca.py @@ -259,7 +259,7 @@ def doc_to_text(src: str, tgt: str) -> str: src_name, tgt_name = map(code_to_language_name, [src, tgt]) return f"""\ -{src_name} sentence: {jinja_var('sentence_' + src)} +{src_name} sentence: {jinja_var("sentence_" + src)} {tgt_name} sentence:""" diff --git a/lm_eval/tasks/csatqa/utils.py b/lm_eval/tasks/csatqa/utils.py index 253bc1b6..485a724c 100644 --- a/lm_eval/tasks/csatqa/utils.py +++ b/lm_eval/tasks/csatqa/utils.py @@ -7,7 +7,7 @@ def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: ### Context: {doc["context"]} ### Question: {doc["question"]} ### Options: -(1) {doc['option#1']}\n(2) {doc["option#2"]}\n(3) {doc["option#3"]}\n(4) {doc['option#4']}\n(5) {doc['option#5']} +(1) {doc["option#1"]}\n(2) {doc["option#2"]}\n(3) {doc["option#3"]}\n(4) {doc["option#4"]}\n(5) {doc["option#5"]} ### Answer: 주어진 문제의 정답은""" out_doc = { diff --git a/lm_eval/tasks/galician_bench/flores_gl/create_yamls_flores_gl.py b/lm_eval/tasks/galician_bench/flores_gl/create_yamls_flores_gl.py index c98b9b21..04787817 100644 --- a/lm_eval/tasks/galician_bench/flores_gl/create_yamls_flores_gl.py +++ b/lm_eval/tasks/galician_bench/flores_gl/create_yamls_flores_gl.py @@ -258,7 +258,7 @@ def doc_to_text(src: str, tgt: str) -> str: src_name, tgt_name = map(code_to_language_name, [src, tgt]) return f"""\ -{src_name} sentence: {jinja_var('sentence_' + src)} +{src_name} sentence: {jinja_var("sentence_" + src)} {tgt_name} sentence:""" diff --git a/lm_eval/tasks/ifeval/instructions.py b/lm_eval/tasks/ifeval/instructions.py index a79cbba4..9a7bcce1 100644 --- a/lm_eval/tasks/ifeval/instructions.py +++ b/lm_eval/tasks/ifeval/instructions.py @@ -722,7 +722,7 @@ class RephraseChecker(Instruction): if not self.is_change(value): raise ValueError( - f"value {value} does not contain " "changes in the form of *change me*." + f"value {value} does not contain changes in the form of *change me*." ) response_without_changes = self.strip_changes(value) diff --git a/lm_eval/tasks/ifeval/instructions_util.py b/lm_eval/tasks/ifeval/instructions_util.py index df58fb30..33e0a0a0 100644 --- a/lm_eval/tasks/ifeval/instructions_util.py +++ b/lm_eval/tasks/ifeval/instructions_util.py @@ -35,10 +35,11 @@ RANK = os.environ.get("LOCAL_RANK", "0") def download_nltk_resources(): """Download 'punkt' if not already installed""" - assert ( - (nltk_version := parse_version(version("nltk"))) - >= parse_version(NLTK_MIN_VERSION) - ), f"`nltk` version {nltk_version} is not >= {NLTK_MIN_VERSION}. Please update `nltk` before proceeding--older versions are vulnerable to a remote code execution vulnerability." + assert (nltk_version := parse_version(version("nltk"))) >= parse_version( + NLTK_MIN_VERSION + ), ( + f"`nltk` version {nltk_version} is not >= {NLTK_MIN_VERSION}. Please update `nltk` before proceeding--older versions are vulnerable to a remote code execution vulnerability." + ) try: nltk.data.find("tokenizers/punkt_tab") diff --git a/lm_eval/tasks/japanese_leaderboard/ja_leaderboard_mgsm.py b/lm_eval/tasks/japanese_leaderboard/ja_leaderboard_mgsm.py index 28f270b5..0d122c7a 100644 --- a/lm_eval/tasks/japanese_leaderboard/ja_leaderboard_mgsm.py +++ b/lm_eval/tasks/japanese_leaderboard/ja_leaderboard_mgsm.py @@ -23,9 +23,9 @@ def _extract_answer(completion): def process_results(doc, results): - assert ( - len(results) == 1 - ), f"results should be a list with 1 str element, but is {results}" + assert len(results) == 1, ( + f"results should be a list with 1 str element, but is {results}" + ) completion = results[0] extracted_answer = _extract_answer(completion) diff --git a/lm_eval/tasks/leaderboard/ifeval/instructions.py b/lm_eval/tasks/leaderboard/ifeval/instructions.py index a79cbba4..9a7bcce1 100644 --- a/lm_eval/tasks/leaderboard/ifeval/instructions.py +++ b/lm_eval/tasks/leaderboard/ifeval/instructions.py @@ -722,7 +722,7 @@ class RephraseChecker(Instruction): if not self.is_change(value): raise ValueError( - f"value {value} does not contain " "changes in the form of *change me*." + f"value {value} does not contain changes in the form of *change me*." ) response_without_changes = self.strip_changes(value) diff --git a/lm_eval/tasks/leaderboard/ifeval/instructions_util.py b/lm_eval/tasks/leaderboard/ifeval/instructions_util.py index 9ca2d4de..6993e418 100644 --- a/lm_eval/tasks/leaderboard/ifeval/instructions_util.py +++ b/lm_eval/tasks/leaderboard/ifeval/instructions_util.py @@ -34,9 +34,9 @@ NLTK_MIN_VERSION = "3.9.1" def download_nltk_resources(): """Download 'punkt' if not already installed""" nltk_version = pkg_resources.get_distribution("nltk").version - assert ( - version.parse(nltk_version) >= version.parse(NLTK_MIN_VERSION) - ), f"`nltk` version {nltk_version} is not >= {NLTK_MIN_VERSION}. Please update `nltk` before proceeding--older versions are vulnerable to a remote code execution vulnerability." + assert version.parse(nltk_version) >= version.parse(NLTK_MIN_VERSION), ( + f"`nltk` version {nltk_version} is not >= {NLTK_MIN_VERSION}. Please update `nltk` before proceeding--older versions are vulnerable to a remote code execution vulnerability." + ) try: nltk.data.find("tokenizers/punkt_tab") diff --git a/lm_eval/tasks/leaderboard/musr/utils.py b/lm_eval/tasks/leaderboard/musr/utils.py index 1d0a7d1c..eb17a529 100644 --- a/lm_eval/tasks/leaderboard/musr/utils.py +++ b/lm_eval/tasks/leaderboard/musr/utils.py @@ -8,7 +8,7 @@ def doc_to_choice(doc): return ast.literal_eval(doc["choices"]) -DOC_TO_TEXT = "{narrative}\n\n" "{question}\n\n" "{choices}\n" "Answer:" +DOC_TO_TEXT = "{narrative}\n\n{question}\n\n{choices}\nAnswer:" def doc_to_text(doc): @@ -17,7 +17,7 @@ def doc_to_text(doc): """ choices = "" for i, choice in enumerate(ast.literal_eval(doc["choices"])): - choices += f"{i+1} - {choice}\n" + choices += f"{i + 1} - {choice}\n" text = DOC_TO_TEXT.format( narrative=doc["narrative"], question=doc["question"], choices=choices diff --git a/lm_eval/tasks/lingoly/utils.py b/lm_eval/tasks/lingoly/utils.py index 21051d77..b4044228 100644 --- a/lm_eval/tasks/lingoly/utils.py +++ b/lm_eval/tasks/lingoly/utils.py @@ -14,13 +14,13 @@ def load_questionsheet(qsheet: dict, no_context: bool = False): all_subquestions += "\n" if no_context: - prompt = f"""{qsheet['preamble']} + prompt = f"""{qsheet["preamble"]} {all_subquestions} """ else: - prompt = f"""{qsheet['preamble']} - {qsheet['context']} + prompt = f"""{qsheet["preamble"]} + {qsheet["context"]} {all_subquestions} """ diff --git a/lm_eval/tasks/portuguese_bench/flores_pt/create_yamls_flores_pt.py b/lm_eval/tasks/portuguese_bench/flores_pt/create_yamls_flores_pt.py index 677e6bb4..a185c744 100644 --- a/lm_eval/tasks/portuguese_bench/flores_pt/create_yamls_flores_pt.py +++ b/lm_eval/tasks/portuguese_bench/flores_pt/create_yamls_flores_pt.py @@ -258,7 +258,7 @@ def doc_to_text(src: str, tgt: str) -> str: src_name, tgt_name = map(code_to_language_name, [src, tgt]) return f"""\ -{src_name} sentence: {jinja_var('sentence_' + src)} +{src_name} sentence: {jinja_var("sentence_" + src)} {tgt_name} sentence:""" diff --git a/lm_eval/tasks/score/non_greedy_summarizer.py b/lm_eval/tasks/score/non_greedy_summarizer.py index 9a927288..a7b78a9e 100644 --- a/lm_eval/tasks/score/non_greedy_summarizer.py +++ b/lm_eval/tasks/score/non_greedy_summarizer.py @@ -127,9 +127,9 @@ def main(): for seed in range(1, N_SEEDS + 1): # Checking if directories exist seed_log_dir = os.path.join(args.log_dir, f"seed_{seed}") - assert os.path.exists( - seed_log_dir - ), f"No logs found for seed={seed}. No directory found at {seed_log_dir}" + assert os.path.exists(seed_log_dir), ( + f"No logs found for seed={seed}. No directory found at {seed_log_dir}" + ) subtasks = None if args.dataset == "agieval": agieval_subtasks = [ diff --git a/lm_eval/tasks/spanish_bench/flores_es/create_yamls_flores_es.py b/lm_eval/tasks/spanish_bench/flores_es/create_yamls_flores_es.py index bf4d49d2..709a3675 100644 --- a/lm_eval/tasks/spanish_bench/flores_es/create_yamls_flores_es.py +++ b/lm_eval/tasks/spanish_bench/flores_es/create_yamls_flores_es.py @@ -258,7 +258,7 @@ def doc_to_text(src: str, tgt: str) -> str: src_name, tgt_name = map(code_to_language_name, [src, tgt]) return f"""\ -{src_name} sentence: {jinja_var('sentence_' + src)} +{src_name} sentence: {jinja_var("sentence_" + src)} {tgt_name} sentence:""" diff --git a/lm_eval/tasks/squadv2/task.py b/lm_eval/tasks/squadv2/task.py index 184a5978..5a77cb5f 100644 --- a/lm_eval/tasks/squadv2/task.py +++ b/lm_eval/tasks/squadv2/task.py @@ -58,9 +58,9 @@ class SQuAD2(ConfigurableTask): super().__init__(config={"metadata": {"version": self.VERSION}}) # HF changed squad on us so we have to make sure we aren't running the old one - assert version.parse(datasets.__version__) >= version.parse( - "1.11.0" - ), "datasets v1.11.0 or later required for SQuAD" + assert version.parse(datasets.__version__) >= version.parse("1.11.0"), ( + "datasets v1.11.0 or later required for SQuAD" + ) def has_training_docs(self): return True diff --git a/lm_eval/tasks/tmlu/default/_generate_configs.py b/lm_eval/tasks/tmlu/default/_generate_configs.py index 86b17608..79e2175d 100644 --- a/lm_eval/tasks/tmlu/default/_generate_configs.py +++ b/lm_eval/tasks/tmlu/default/_generate_configs.py @@ -14,7 +14,8 @@ categories = { "STEM": [ "biology", "chemistry", - "mathematics" "physics", + "mathematics", + "physics", "earth science", ], "humanities": ["Chinese", "history", "Tour", "law"], diff --git a/lm_eval/utils.py b/lm_eval/utils.py index 537a4a25..18c7057f 100644 --- a/lm_eval/utils.py +++ b/lm_eval/utils.py @@ -48,9 +48,9 @@ def escaped_split(text, sep_char, maxsplit=-1): is not specified or less than 0, then there is no limit on the number of splits (all possible splits are made). """ - assert ( - len(sep_char) == 1 - ), "separation string must be a single character for escaped splitting" + assert len(sep_char) == 1, ( + "separation string must be a single character for escaped splitting" + ) if maxsplit == 0: return text diff --git a/scripts/model_comparator.py b/scripts/model_comparator.py index 55f4f3b1..ae211824 100644 --- a/scripts/model_comparator.py +++ b/scripts/model_comparator.py @@ -17,7 +17,7 @@ eval_logger = utils.eval_logger def memory_stats(): eval_logger.info( - f"Memory allocated: {torch.cuda.memory_allocated() / 1024 ** 2}, reserved: {torch.cuda.memory_reserved() // 1024 ** 2}" + f"Memory allocated: {torch.cuda.memory_allocated() / 1024**2}, reserved: {torch.cuda.memory_reserved() // 1024**2}" ) diff --git a/scripts/zeno_visualize.py b/scripts/zeno_visualize.py index 362041c4..1668471c 100644 --- a/scripts/zeno_visualize.py +++ b/scripts/zeno_visualize.py @@ -66,9 +66,9 @@ def main(): f"All models must have the same tasks. {model} has tasks: {model_tasks} but have already recorded tasks: {old_tasks}. Taking intersection {tasks}" ) - assert ( - len(tasks) > 0 - ), "Must provide at least one task in common amongst models to compare." + assert len(tasks) > 0, ( + "Must provide at least one task in common amongst models to compare." + ) for task in tasks: # Upload data for all models diff --git a/tests/test_tasks.py b/tests/test_tasks.py index fc9bb59d..b70bb81f 100644 --- a/tests/test_tasks.py +++ b/tests/test_tasks.py @@ -87,7 +87,9 @@ class TestNewTasks: (x[-1].isspace() is False if len(x) > 0 else True) if target_delimiter.isspace() else True - ), "doc_to_text ends in a whitespace and target delimiter also a whitespace" + ), ( + "doc_to_text ends in a whitespace and target delimiter also a whitespace" + ) else: pass -- GitLab From a5c344cf5c48ef70ce7a2edc311e66bdaf7a1ed8 Mon Sep 17 00:00:00 2001 From: Minho Ryu Date: Tue, 21 Jan 2025 05:38:38 +0900 Subject: [PATCH 03/19] add hrm8k benchmark for both Korean and English (#2627) * add hrm8k benchmark for both Korean and English * apply precommit * revise tasks to make models not to directly answer; use zeroshot_cot if possible * add README * Add hrm8k on the task-list --------- Co-authored-by: Baber --- lm_eval/tasks/README.md | 259 ++++++++-------- lm_eval/tasks/hrm8k/README.md | 46 +++ lm_eval/tasks/hrm8k/default/_hrm8k_yaml | 22 ++ lm_eval/tasks/hrm8k/default/hrm8k.yaml | 13 + lm_eval/tasks/hrm8k/default/hrm8k_gsm8k.yaml | 3 + lm_eval/tasks/hrm8k/default/hrm8k_ksm.yaml | 3 + lm_eval/tasks/hrm8k/default/hrm8k_math.yaml | 3 + lm_eval/tasks/hrm8k/default/hrm8k_mmmlu.yaml | 4 + .../tasks/hrm8k/default/hrm8k_omni_math.yaml | 3 + lm_eval/tasks/hrm8k/default/utils.py | 285 ++++++++++++++++++ lm_eval/tasks/hrm8k/en/_hrm8k_en_yaml | 22 ++ lm_eval/tasks/hrm8k/en/hrm8k_en.yaml | 13 + lm_eval/tasks/hrm8k/en/hrm8k_gsm8k_en.yaml | 3 + lm_eval/tasks/hrm8k/en/hrm8k_ksm_en.yaml | 3 + lm_eval/tasks/hrm8k/en/hrm8k_math_en.yaml | 3 + lm_eval/tasks/hrm8k/en/hrm8k_mmmlu_en.yaml | 4 + .../tasks/hrm8k/en/hrm8k_omni_math_en.yaml | 3 + lm_eval/tasks/hrm8k/en/utils.py | 285 ++++++++++++++++++ 18 files changed, 848 insertions(+), 129 deletions(-) create mode 100644 lm_eval/tasks/hrm8k/README.md create mode 100644 lm_eval/tasks/hrm8k/default/_hrm8k_yaml create mode 100644 lm_eval/tasks/hrm8k/default/hrm8k.yaml create mode 100644 lm_eval/tasks/hrm8k/default/hrm8k_gsm8k.yaml create mode 100644 lm_eval/tasks/hrm8k/default/hrm8k_ksm.yaml create mode 100644 lm_eval/tasks/hrm8k/default/hrm8k_math.yaml create mode 100644 lm_eval/tasks/hrm8k/default/hrm8k_mmmlu.yaml create mode 100644 lm_eval/tasks/hrm8k/default/hrm8k_omni_math.yaml create mode 100644 lm_eval/tasks/hrm8k/default/utils.py create mode 100644 lm_eval/tasks/hrm8k/en/_hrm8k_en_yaml create mode 100644 lm_eval/tasks/hrm8k/en/hrm8k_en.yaml create mode 100644 lm_eval/tasks/hrm8k/en/hrm8k_gsm8k_en.yaml create mode 100644 lm_eval/tasks/hrm8k/en/hrm8k_ksm_en.yaml create mode 100644 lm_eval/tasks/hrm8k/en/hrm8k_math_en.yaml create mode 100644 lm_eval/tasks/hrm8k/en/hrm8k_mmmlu_en.yaml create mode 100644 lm_eval/tasks/hrm8k/en/hrm8k_omni_math_en.yaml create mode 100644 lm_eval/tasks/hrm8k/en/utils.py diff --git a/lm_eval/tasks/README.md b/lm_eval/tasks/README.md index 8a9363a9..c92043bc 100644 --- a/lm_eval/tasks/README.md +++ b/lm_eval/tasks/README.md @@ -5,135 +5,136 @@ For more information, including a full list of task names and their precise meanings or sources, follow the links provided to the individual README.md files for each subfolder. -| Task Family | Description | Language(s) | -|-------------|-------------|-------------| -| [aclue](aclue/README.md) | Tasks focusing on ancient Chinese language understanding and cultural aspects. | Ancient Chinese | -| [aexams](aexams/README.md) | Tasks in Arabic related to various academic exams covering a range of subjects. | Arabic | -| [agieval](agieval/README.md) | Tasks involving historical data or questions related to history and historical texts. | English, Chinese | -| [anli](anli/README.md) | Adversarial natural language inference tasks designed to test model robustness. | English | -| [arabic_leaderboard_complete](arabic_leaderboard_complete/README.md) | A full version of the tasks in the Open Arabic LLM Leaderboard, focusing on the evaluation of models that reflect the characteristics of Arabic language understanding and comprehension, culture, and heritage. Note that some of these tasks are machine-translated. | Arabic (Some MT) | -| [arabic_leaderboard_light](arabic_leaderboard_light/README.md) | A light version of the tasks in the Open Arabic LLM Leaderboard (i.e., 10% samples of the test set in the original benchmarks), focusing on the evaluation of models that reflect the characteristics of Arabic language understanding and comprehension, culture, and heritage. Note that some of these tasks are machine-translated. | Arabic (Some MT) | -| [arabicmmlu](arabicmmlu/README.md) | Localized Arabic version of MMLU with multiple-choice questions from 40 subjects. | Arabic | -| [AraDICE](aradice/README.md) | A collection of multiple tasks carefully designed to evaluate dialectal and cultural capabilities in large language models (LLMs). | Arabic | -| [arc](arc/README.md) | Tasks involving complex reasoning over a diverse set of questions. | English | -| [arithmetic](arithmetic/README.md) | Tasks involving numerical computations and arithmetic reasoning. | English | -| [asdiv](asdiv/README.md) | Tasks involving arithmetic and mathematical reasoning challenges. | English | -| [babi](babi/README.md) | Tasks designed as question and answering challenges based on simulated stories. | English | -| [basque_bench](basque_bench/README.md) | Collection of tasks in Basque encompassing various evaluation areas. | Basque | -| [basqueglue](basqueglue/README.md) | Tasks designed to evaluate language understanding in Basque language. | Basque | -| [bbh](bbh/README.md) | Tasks focused on deep semantic understanding through hypothesization and reasoning. | English, German | -| [belebele](belebele/README.md) | Language understanding tasks in a variety of languages and scripts. | Multiple (122 languages) | -| benchmarks | General benchmarking tasks that test a wide range of language understanding capabilities. | | -| [bertaqa](bertaqa/README.md) | Local Basque cultural trivia QA tests in English and Basque languages. | English, Basque, Basque (MT) | -| [bigbench](bigbench/README.md) | Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models. | Multiple | -| [blimp](blimp/README.md) | Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities. | English | -| [catalan_bench](catalan_bench/README.md) | Collection of tasks in Catalan encompassing various evaluation areas. | Catalan | -| [ceval](ceval/README.md) | Tasks that evaluate language understanding and reasoning in an educational context. | Chinese | -| [cmmlu](cmmlu/README.md) | Multi-subject multiple choice question tasks for comprehensive academic assessment. | Chinese | -| code_x_glue | Tasks that involve understanding and generating code across multiple programming languages. | Go, Java, JS, PHP, Python, Ruby | -| [commonsense_qa](commonsense_qa/README.md) | CommonsenseQA, a multiple-choice QA dataset for measuring commonsense knowledge. | English | -| [copal_id](copal_id/README.md) | Indonesian causal commonsense reasoning dataset that captures local nuances. | Indonesian | -| [coqa](coqa/README.md) | Conversational question answering tasks to test dialog understanding. | English | -| [crows_pairs](crows_pairs/README.md) | Tasks designed to test model biases in various sociodemographic groups. | English, French | -| csatqa | Tasks related to SAT and other standardized testing questions for academic assessment. | Korean | -| [drop](drop/README.md) | Tasks requiring numerical reasoning, reading comprehension, and question answering. | English | -| [eq_bench](eq_bench/README.md) | Tasks focused on equality and ethics in question answering and decision-making. | English | -| [eus_exams](eus_exams/README.md) | Tasks based on various professional and academic exams in the Basque language. | Basque | -| [eus_proficiency](eus_proficiency/README.md) | Tasks designed to test proficiency in the Basque language across various topics. | Basque | -| [eus_reading](eus_reading/README.md) | Reading comprehension tasks specifically designed for the Basque language. | Basque | -| [eus_trivia](eus_trivia/README.md) | Trivia and knowledge testing tasks in the Basque language. | Basque | -| [fda](fda/README.md) | Tasks for extracting key-value pairs from FDA documents to test information extraction. | English | -| [fld](fld/README.md) | Tasks involving free-form and directed dialogue understanding. | English | -| [french_bench](french_bench/README.md) | Set of tasks designed to assess language model performance in French. | French| -| [galician_bench](galician_bench/README.md) | Collection of tasks in Galician encompassing various evaluation areas. | Galician | -| [global_mmlu](global_mmlu/README.md) | Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits. | Multiple (15 languages) | -| [glue](glue/README.md) | General Language Understanding Evaluation benchmark to test broad language abilities. | English | -| [gpqa](gpqa/README.md) | Tasks designed for general public question answering and knowledge verification. | English | -| [gsm8k](gsm8k/README.md) | A benchmark of grade school math problems aimed at evaluating reasoning capabilities. | English | -| [haerae](haerae/README.md) | Tasks focused on assessing detailed factual and historical knowledge. | Korean | -| [headqa](headqa/README.md) | A high-level education-based question answering dataset to test specialized knowledge. | Spanish, English | -| [hellaswag](hellaswag/README.md) | Tasks to predict the ending of stories or scenarios, testing comprehension and creativity. | English | -| [hendrycks_ethics](hendrycks_ethics/README.md) | Tasks designed to evaluate the ethical reasoning capabilities of models. | English | -| [hendrycks_math](hendrycks_math/README.md) | Mathematical problem-solving tasks to test numerical reasoning and problem-solving. | English | -| [humaneval](humaneval/README.md) | Code generation task that measure functional correctness for synthesizing programs from docstrings. | Python | -| [ifeval](ifeval/README.md) | Interactive fiction evaluation tasks for narrative understanding and reasoning. | English | -| [inverse_scaling](inverse_scaling/README.md) | Multiple-choice tasks from the Inverse Scaling Prize, designed to find settings where larger language models perform worse. | English | -| [japanese_leaderboard](japanese_leaderboard/README.md) | Japanese language understanding tasks to benchmark model performance on various linguistic aspects. | Japanese | -| [kbl](kbl/README.md) | Korean Benchmark for Legal Language Understanding. | Korean | -| [kmmlu](kmmlu/README.md) | Knowledge-based multi-subject multiple choice questions for academic evaluation. | Korean | -| [kobest](kobest/README.md) | A collection of tasks designed to evaluate understanding in Korean language. | Korean | -| [kormedmcqa](kormedmcqa/README.md) | Medical question answering tasks in Korean to test specialized domain knowledge. | Korean | -| [lambada](lambada/README.md) | Tasks designed to predict the endings of text passages, testing language prediction skills. | English | -| [lambada_cloze](lambada_cloze/README.md) | Cloze-style LAMBADA dataset. | English | -| [lambada_multilingual](lambada_multilingual/README.md) | Multilingual LAMBADA dataset. This is a legacy version of the multilingual dataset, and users should instead use `lambada_multilingual_stablelm`. | German, English, Spanish, French, Italian | -| [lambada_multilingual_stablelm](lambada_multilingual_stablelm/README.md) | Multilingual LAMBADA dataset. Users should prefer evaluating on this version of the multilingual dataset instead of on `lambada_multilingual`. | German, English, Spanish, French, Italian, Dutch, Portuguese | -| [leaderboard](leaderboard/README.md) | Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time | English | -| [lingoly](lingoly/README.md) | Challenging logical reasoning benchmark in low-resource languages with controls for memorization | English, Multilingual | -| [logiqa](logiqa/README.md) | Logical reasoning tasks requiring advanced inference and deduction. | English, Chinese | -| [logiqa2](logiqa2/README.md) | Large-scale logical reasoning dataset adapted from the Chinese Civil Service Examination. | English, Chinese | -| [mathqa](mathqa/README.md) | Question answering tasks involving mathematical reasoning and problem-solving. | English | +| Task Family | Description | Language(s) | +|-------------|-------------|-------------------------------------------------------------------------------------------------------------------------------| +| [aclue](aclue/README.md) | Tasks focusing on ancient Chinese language understanding and cultural aspects. | Ancient Chinese | +| [aexams](aexams/README.md) | Tasks in Arabic related to various academic exams covering a range of subjects. | Arabic | +| [agieval](agieval/README.md) | Tasks involving historical data or questions related to history and historical texts. | English, Chinese | +| [anli](anli/README.md) | Adversarial natural language inference tasks designed to test model robustness. | English | +| [arabic_leaderboard_complete](arabic_leaderboard_complete/README.md) | A full version of the tasks in the Open Arabic LLM Leaderboard, focusing on the evaluation of models that reflect the characteristics of Arabic language understanding and comprehension, culture, and heritage. Note that some of these tasks are machine-translated. | Arabic (Some MT) | +| [arabic_leaderboard_light](arabic_leaderboard_light/README.md) | A light version of the tasks in the Open Arabic LLM Leaderboard (i.e., 10% samples of the test set in the original benchmarks), focusing on the evaluation of models that reflect the characteristics of Arabic language understanding and comprehension, culture, and heritage. Note that some of these tasks are machine-translated. | Arabic (Some MT) | +| [arabicmmlu](arabicmmlu/README.md) | Localized Arabic version of MMLU with multiple-choice questions from 40 subjects. | Arabic | +| [AraDICE](aradice/README.md) | A collection of multiple tasks carefully designed to evaluate dialectal and cultural capabilities in large language models (LLMs). | Arabic | +| [arc](arc/README.md) | Tasks involving complex reasoning over a diverse set of questions. | English | +| [arithmetic](arithmetic/README.md) | Tasks involving numerical computations and arithmetic reasoning. | English | +| [asdiv](asdiv/README.md) | Tasks involving arithmetic and mathematical reasoning challenges. | English | +| [babi](babi/README.md) | Tasks designed as question and answering challenges based on simulated stories. | English | +| [basque_bench](basque_bench/README.md) | Collection of tasks in Basque encompassing various evaluation areas. | Basque | +| [basqueglue](basqueglue/README.md) | Tasks designed to evaluate language understanding in Basque language. | Basque | +| [bbh](bbh/README.md) | Tasks focused on deep semantic understanding through hypothesization and reasoning. | English, German | +| [belebele](belebele/README.md) | Language understanding tasks in a variety of languages and scripts. | Multiple (122 languages) | +| benchmarks | General benchmarking tasks that test a wide range of language understanding capabilities. | | +| [bertaqa](bertaqa/README.md) | Local Basque cultural trivia QA tests in English and Basque languages. | English, Basque, Basque (MT) | +| [bigbench](bigbench/README.md) | Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models. | Multiple | +| [blimp](blimp/README.md) | Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities. | English | +| [catalan_bench](catalan_bench/README.md) | Collection of tasks in Catalan encompassing various evaluation areas. | Catalan | +| [ceval](ceval/README.md) | Tasks that evaluate language understanding and reasoning in an educational context. | Chinese | +| [cmmlu](cmmlu/README.md) | Multi-subject multiple choice question tasks for comprehensive academic assessment. | Chinese | +| code_x_glue | Tasks that involve understanding and generating code across multiple programming languages. | Go, Java, JS, PHP, Python, Ruby | +| [commonsense_qa](commonsense_qa/README.md) | CommonsenseQA, a multiple-choice QA dataset for measuring commonsense knowledge. | English | +| [copal_id](copal_id/README.md) | Indonesian causal commonsense reasoning dataset that captures local nuances. | Indonesian | +| [coqa](coqa/README.md) | Conversational question answering tasks to test dialog understanding. | English | +| [crows_pairs](crows_pairs/README.md) | Tasks designed to test model biases in various sociodemographic groups. | English, French | +| csatqa | Tasks related to SAT and other standardized testing questions for academic assessment. | Korean | +| [drop](drop/README.md) | Tasks requiring numerical reasoning, reading comprehension, and question answering. | English | +| [eq_bench](eq_bench/README.md) | Tasks focused on equality and ethics in question answering and decision-making. | English | +| [eus_exams](eus_exams/README.md) | Tasks based on various professional and academic exams in the Basque language. | Basque | +| [eus_proficiency](eus_proficiency/README.md) | Tasks designed to test proficiency in the Basque language across various topics. | Basque | +| [eus_reading](eus_reading/README.md) | Reading comprehension tasks specifically designed for the Basque language. | Basque | +| [eus_trivia](eus_trivia/README.md) | Trivia and knowledge testing tasks in the Basque language. | Basque | +| [fda](fda/README.md) | Tasks for extracting key-value pairs from FDA documents to test information extraction. | English | +| [fld](fld/README.md) | Tasks involving free-form and directed dialogue understanding. | English | +| [french_bench](french_bench/README.md) | Set of tasks designed to assess language model performance in French. | French | +| [galician_bench](galician_bench/README.md) | Collection of tasks in Galician encompassing various evaluation areas. | Galician | +| [global_mmlu](global_mmlu/README.md) | Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits. | Multiple (15 languages) | +| [glue](glue/README.md) | General Language Understanding Evaluation benchmark to test broad language abilities. | English | +| [gpqa](gpqa/README.md) | Tasks designed for general public question answering and knowledge verification. | English | +| [gsm8k](gsm8k/README.md) | A benchmark of grade school math problems aimed at evaluating reasoning capabilities. | English | +| [haerae](haerae/README.md) | Tasks focused on assessing detailed factual and historical knowledge. | Korean | +| [headqa](headqa/README.md) | A high-level education-based question answering dataset to test specialized knowledge. | Spanish, English | +| [hellaswag](hellaswag/README.md) | Tasks to predict the ending of stories or scenarios, testing comprehension and creativity. | English | +| [hendrycks_ethics](hendrycks_ethics/README.md) | Tasks designed to evaluate the ethical reasoning capabilities of models. | English | +| [hendrycks_math](hendrycks_math/README.md) | Mathematical problem-solving tasks to test numerical reasoning and problem-solving. | English | +| [hrm8k](hrm8k/README.md) | A challenging bilingual math reasoning benchmark for Korean and English. | Korean (Some MT), English (Some MT) | +| [humaneval](humaneval/README.md) | Code generation task that measure functional correctness for synthesizing programs from docstrings. | Python | +| [ifeval](ifeval/README.md) | Interactive fiction evaluation tasks for narrative understanding and reasoning. | English | +| [inverse_scaling](inverse_scaling/README.md) | Multiple-choice tasks from the Inverse Scaling Prize, designed to find settings where larger language models perform worse. | English | +| [japanese_leaderboard](japanese_leaderboard/README.md) | Japanese language understanding tasks to benchmark model performance on various linguistic aspects. | Japanese | +| [kbl](kbl/README.md) | Korean Benchmark for Legal Language Understanding. | Korean | +| [kmmlu](kmmlu/README.md) | Knowledge-based multi-subject multiple choice questions for academic evaluation. | Korean | +| [kobest](kobest/README.md) | A collection of tasks designed to evaluate understanding in Korean language. | Korean | +| [kormedmcqa](kormedmcqa/README.md) | Medical question answering tasks in Korean to test specialized domain knowledge. | Korean | +| [lambada](lambada/README.md) | Tasks designed to predict the endings of text passages, testing language prediction skills. | English | +| [lambada_cloze](lambada_cloze/README.md) | Cloze-style LAMBADA dataset. | English | +| [lambada_multilingual](lambada_multilingual/README.md) | Multilingual LAMBADA dataset. This is a legacy version of the multilingual dataset, and users should instead use `lambada_multilingual_stablelm`. | German, English, Spanish, French, Italian | +| [lambada_multilingual_stablelm](lambada_multilingual_stablelm/README.md) | Multilingual LAMBADA dataset. Users should prefer evaluating on this version of the multilingual dataset instead of on `lambada_multilingual`. | German, English, Spanish, French, Italian, Dutch, Portuguese | +| [leaderboard](leaderboard/README.md) | Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time | English | +| [lingoly](lingoly/README.md) | Challenging logical reasoning benchmark in low-resource languages with controls for memorization | English, Multilingual | +| [logiqa](logiqa/README.md) | Logical reasoning tasks requiring advanced inference and deduction. | English, Chinese | +| [logiqa2](logiqa2/README.md) | Large-scale logical reasoning dataset adapted from the Chinese Civil Service Examination. | English, Chinese | +| [mathqa](mathqa/README.md) | Question answering tasks involving mathematical reasoning and problem-solving. | English | | [mbpp](mbpp/README.md) | A benchmark designed to measure the ability to synthesize short Python programs from natural language descriptions. | Python | -| [mc_taco](mc_taco/README.md) | Question-answer pairs that require temporal commonsense comprehension. | English | -| [med_concepts_qa](med_concepts_qa/README.md) | Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept. | English | -| [metabench](metabench/README.md) | Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait. | English | -| medmcqa | Medical multiple choice questions assessing detailed medical knowledge. | English | -| medqa | Multiple choice question answering based on the United States Medical License Exams. | | -| [mgsm](mgsm/README.md) | Benchmark of multilingual grade-school math problems. | Spanish, French, German, Russian, Chinese, Japanese, Thai, Swahili, Bengali, Telugu | -| [minerva_math](minerva_math/README.md) | Mathematics-focused tasks requiring numerical reasoning and problem-solving skills. | English | +| [mc_taco](mc_taco/README.md) | Question-answer pairs that require temporal commonsense comprehension. | English | +| [med_concepts_qa](med_concepts_qa/README.md) | Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept. | English | +| [metabench](metabench/README.md) | Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait. | English | +| medmcqa | Medical multiple choice questions assessing detailed medical knowledge. | English | +| medqa | Multiple choice question answering based on the United States Medical License Exams. | | +| [mgsm](mgsm/README.md) | Benchmark of multilingual grade-school math problems. | Spanish, French, German, Russian, Chinese, Japanese, Thai, Swahili, Bengali, Telugu | +| [minerva_math](minerva_math/README.md) | Mathematics-focused tasks requiring numerical reasoning and problem-solving skills. | English | | [mlqa](mlqa/README.md) | MultiLingual Question Answering benchmark dataset for evaluating cross-lingual question answering performance. | English, Arabic, German, Spanish, Hindi, Vietnamese, Simplified Chinese | -| [mmlu](mmlu/README.md) | Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported. | English | -| [mmlu_pro](mmlu_pro/README.md) | A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options. | English | -| [mmlusr](mmlusr/README.md) | Variation of MMLU designed to be more rigorous. | English | -| model_written_evals | Evaluation tasks auto-generated for evaluating a collection of AI Safety concerns. | | -| [mutual](mutual/README.md) | A retrieval-based dataset for multi-turn dialogue reasoning. | English | -| [nq_open](nq_open/README.md) | Open domain question answering tasks based on the Natural Questions dataset. | English | -| [okapi/arc_multilingual](okapi/arc_multilingual/README.md) | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (31 languages) **Machine Translated.** | -| [okapi/hellaswag_multilingual](okapi/hellaswag_multilingual/README.md) | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (30 languages) **Machine Translated.** | -| okapi/mmlu_multilingual | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (34 languages) **Machine Translated.** | -| [okapi/truthfulqa_multilingual](okapi/truthfulqa_multilingual/README.md) | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (31 languages) **Machine Translated.** | -| [openbookqa](openbookqa/README.md) | Open-book question answering tasks that require external knowledge and reasoning. | English | -| [paloma](paloma/README.md) | Paloma is a comprehensive benchmark designed to evaluate open language models across a wide range of domains, ranging from niche artist communities to mental health forums on Reddit. | English | -| [paws-x](paws-x/README.md) | Paraphrase Adversaries from Word Scrambling, focusing on cross-lingual capabilities. | English, French, Spanish, German, Chinese, Japanese, Korean | -| [pile](pile/README.md) | Open source language modelling data set that consists of 22 smaller, high-quality datasets. | English | -| [pile_10k](pile_10k/README.md) | The first 10K elements of The Pile, useful for debugging models trained on it. | English | -| [piqa](piqa/README.md) | Physical Interaction Question Answering tasks to test physical commonsense reasoning. | English | -| [polemo2](polemo2/README.md) | Sentiment analysis and emotion detection tasks based on Polish language data. | Polish | -| [portuguese_bench](portuguese_bench/README.md) | Collection of tasks in European Portuguese encompassing various evaluation areas. | Portuguese | -| [prost](prost/README.md) | Tasks requiring understanding of professional standards and ethics in various domains. | English | -| [pubmedqa](pubmedqa/README.md) | Question answering tasks based on PubMed research articles for biomedical understanding. | English | -| [qa4mre](qa4mre/README.md) | Question Answering for Machine Reading Evaluation, assessing comprehension and reasoning. | English | -| [qasper](qasper/README.md) | Question Answering dataset based on academic papers, testing in-depth scientific knowledge. | English | -| [race](race/README.md) | Reading comprehension assessment tasks based on English exams in China. | English | -| realtoxicityprompts | Tasks to evaluate language models for generating text with potential toxicity. | | -| [sciq](sciq/README.md) | Science Question Answering tasks to assess understanding of scientific concepts. | English | -| [score](score/README.md) | Systematic consistency and robustness evaluation for LLMs on 3 datasets(MMLU-Pro, Agi Eval and MATH) | English | -| [scrolls](scrolls/README.md) | Tasks that involve long-form reading comprehension across various domains. | English | -| [siqa](siqa/README.md) | Social Interaction Question Answering to evaluate common sense and social reasoning. | English | -| [spanish_bench](spanish_bench/README.md) | Collection of tasks in Spanish encompassing various evaluation areas. | Spanish | -| [squad_completion](squad_completion/README.md) | A variant of the SQuAD question answering task designed for zero-shot evaluation of small LMs. | English | -| [squadv2](squadv2/README.md) | Stanford Question Answering Dataset version 2, a reading comprehension benchmark. | English | -| [storycloze](storycloze/README.md) | Tasks to predict story endings, focusing on narrative logic and coherence. | English | -| [super_glue](super_glue/README.md) | A suite of challenging tasks designed to test a range of language understanding skills. | English | -| [swag](swag/README.md) | Situations With Adversarial Generations, predicting the next event in videos. | English | -| [swde](swde/README.md) | Information extraction tasks from semi-structured web pages. | English | -| [tinyBenchmarks](tinyBenchmarks/README.md) | Evaluation of large language models with fewer examples using tiny versions of popular benchmarks. | English | -| [tmmluplus](tmmluplus/README.md) | An extended set of tasks under the TMMLU framework for broader academic assessments. | Traditional Chinese | -| [toxigen](toxigen/README.md) | Tasks designed to evaluate language models on their propensity to generate toxic content. | English | -| [translation](translation/README.md) | Tasks focused on evaluating the language translation capabilities of models. | Arabic, English, Spanish, Basque, Hindi, Indonesian, Burmese, Russian, Swahili, Telugu, Chinese | -| [triviaqa](triviaqa/README.md) | A large-scale dataset for trivia question answering to test general knowledge. | English | -| [truthfulqa](truthfulqa/README.md) | A QA task aimed at evaluating the truthfulness and factual accuracy of model responses. | English | -| [turkishmmlu](turkishmmlu/README.md) | A multiple-choice QA test modeled after MMLU, written in Turkish based on Turkish high-school level exams. | Turkish | -| [unitxt](unitxt/README.md) | A number of tasks implemented using the unitxt library for flexible, shareable, and reusable data preparation and evaluation for generative AI. | English | -| [unscramble](unscramble/README.md) | Tasks involving the rearrangement of scrambled sentences to test syntactic understanding. | English | -| [webqs](webqs/README.md) | Web-based question answering tasks designed to evaluate internet search and retrieval. | English | -| [wikitext](wikitext/README.md) | Tasks based on text from Wikipedia articles to assess language modeling and generation. | English | -| [winogrande](winogrande/README.md) | A large-scale dataset for coreference resolution, inspired by the Winograd Schema Challenge. | English | -| [wmdp](wmdp/README.md) | A benchmark with the objective of minimizing performance, based on potentially-sensitive multiple-choice knowledge questions. | English | -| [wmt2016](wmt2016/README.md) | Tasks from the WMT 2016 shared task, focusing on translation between multiple languages. | English, Czech, German, Finnish, Russian, Romanian, Turkish | -| [wsc273](wsc273/README.md) | The Winograd Schema Challenge, a test of commonsense reasoning and coreference resolution. | English | -| [xcopa](xcopa/README.md) | Cross-lingual Choice of Plausible Alternatives, testing reasoning in multiple languages. | Estonian, Haitian, Indonesian, Italian, Quechua, Swahili, Tamil, Thai, Turkish, Vietnamese, Chinese | +| [mmlu](mmlu/README.md) | Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported. | English | +| [mmlu_pro](mmlu_pro/README.md) | A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options. | English | +| [mmlusr](mmlusr/README.md) | Variation of MMLU designed to be more rigorous. | English | +| model_written_evals | Evaluation tasks auto-generated for evaluating a collection of AI Safety concerns. | | +| [mutual](mutual/README.md) | A retrieval-based dataset for multi-turn dialogue reasoning. | English | +| [nq_open](nq_open/README.md) | Open domain question answering tasks based on the Natural Questions dataset. | English | +| [okapi/arc_multilingual](okapi/arc_multilingual/README.md) | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (31 languages) **Machine Translated.** | +| [okapi/hellaswag_multilingual](okapi/hellaswag_multilingual/README.md) | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (30 languages) **Machine Translated.** | +| okapi/mmlu_multilingual | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (34 languages) **Machine Translated.** | +| [okapi/truthfulqa_multilingual](okapi/truthfulqa_multilingual/README.md) | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (31 languages) **Machine Translated.** | +| [openbookqa](openbookqa/README.md) | Open-book question answering tasks that require external knowledge and reasoning. | English | +| [paloma](paloma/README.md) | Paloma is a comprehensive benchmark designed to evaluate open language models across a wide range of domains, ranging from niche artist communities to mental health forums on Reddit. | English | +| [paws-x](paws-x/README.md) | Paraphrase Adversaries from Word Scrambling, focusing on cross-lingual capabilities. | English, French, Spanish, German, Chinese, Japanese, Korean | +| [pile](pile/README.md) | Open source language modelling data set that consists of 22 smaller, high-quality datasets. | English | +| [pile_10k](pile_10k/README.md) | The first 10K elements of The Pile, useful for debugging models trained on it. | English | +| [piqa](piqa/README.md) | Physical Interaction Question Answering tasks to test physical commonsense reasoning. | English | +| [polemo2](polemo2/README.md) | Sentiment analysis and emotion detection tasks based on Polish language data. | Polish | +| [portuguese_bench](portuguese_bench/README.md) | Collection of tasks in European Portuguese encompassing various evaluation areas. | Portuguese | +| [prost](prost/README.md) | Tasks requiring understanding of professional standards and ethics in various domains. | English | +| [pubmedqa](pubmedqa/README.md) | Question answering tasks based on PubMed research articles for biomedical understanding. | English | +| [qa4mre](qa4mre/README.md) | Question Answering for Machine Reading Evaluation, assessing comprehension and reasoning. | English | +| [qasper](qasper/README.md) | Question Answering dataset based on academic papers, testing in-depth scientific knowledge. | English | +| [race](race/README.md) | Reading comprehension assessment tasks based on English exams in China. | English | +| realtoxicityprompts | Tasks to evaluate language models for generating text with potential toxicity. | | +| [sciq](sciq/README.md) | Science Question Answering tasks to assess understanding of scientific concepts. | English | +| [score](score/README.md) | Systematic consistency and robustness evaluation for LLMs on 3 datasets(MMLU-Pro, Agi Eval and MATH) | English | +| [scrolls](scrolls/README.md) | Tasks that involve long-form reading comprehension across various domains. | English | +| [siqa](siqa/README.md) | Social Interaction Question Answering to evaluate common sense and social reasoning. | English | +| [spanish_bench](spanish_bench/README.md) | Collection of tasks in Spanish encompassing various evaluation areas. | Spanish | +| [squad_completion](squad_completion/README.md) | A variant of the SQuAD question answering task designed for zero-shot evaluation of small LMs. | English | +| [squadv2](squadv2/README.md) | Stanford Question Answering Dataset version 2, a reading comprehension benchmark. | English | +| [storycloze](storycloze/README.md) | Tasks to predict story endings, focusing on narrative logic and coherence. | English | +| [super_glue](super_glue/README.md) | A suite of challenging tasks designed to test a range of language understanding skills. | English | +| [swag](swag/README.md) | Situations With Adversarial Generations, predicting the next event in videos. | English | +| [swde](swde/README.md) | Information extraction tasks from semi-structured web pages. | English | +| [tinyBenchmarks](tinyBenchmarks/README.md) | Evaluation of large language models with fewer examples using tiny versions of popular benchmarks. | English | +| [tmmluplus](tmmluplus/README.md) | An extended set of tasks under the TMMLU framework for broader academic assessments. | Traditional Chinese | +| [toxigen](toxigen/README.md) | Tasks designed to evaluate language models on their propensity to generate toxic content. | English | +| [translation](translation/README.md) | Tasks focused on evaluating the language translation capabilities of models. | Arabic, English, Spanish, Basque, Hindi, Indonesian, Burmese, Russian, Swahili, Telugu, Chinese | +| [triviaqa](triviaqa/README.md) | A large-scale dataset for trivia question answering to test general knowledge. | English | +| [truthfulqa](truthfulqa/README.md) | A QA task aimed at evaluating the truthfulness and factual accuracy of model responses. | English | +| [turkishmmlu](turkishmmlu/README.md) | A multiple-choice QA test modeled after MMLU, written in Turkish based on Turkish high-school level exams. | Turkish | +| [unitxt](unitxt/README.md) | A number of tasks implemented using the unitxt library for flexible, shareable, and reusable data preparation and evaluation for generative AI. | English | +| [unscramble](unscramble/README.md) | Tasks involving the rearrangement of scrambled sentences to test syntactic understanding. | English | +| [webqs](webqs/README.md) | Web-based question answering tasks designed to evaluate internet search and retrieval. | English | +| [wikitext](wikitext/README.md) | Tasks based on text from Wikipedia articles to assess language modeling and generation. | English | +| [winogrande](winogrande/README.md) | A large-scale dataset for coreference resolution, inspired by the Winograd Schema Challenge. | English | +| [wmdp](wmdp/README.md) | A benchmark with the objective of minimizing performance, based on potentially-sensitive multiple-choice knowledge questions. | English | +| [wmt2016](wmt2016/README.md) | Tasks from the WMT 2016 shared task, focusing on translation between multiple languages. | English, Czech, German, Finnish, Russian, Romanian, Turkish | +| [wsc273](wsc273/README.md) | The Winograd Schema Challenge, a test of commonsense reasoning and coreference resolution. | English | +| [xcopa](xcopa/README.md) | Cross-lingual Choice of Plausible Alternatives, testing reasoning in multiple languages. | Estonian, Haitian, Indonesian, Italian, Quechua, Swahili, Tamil, Thai, Turkish, Vietnamese, Chinese | | [xnli](xnli/README.md) | Cross-Lingual Natural Language Inference to test understanding across different languages. | Arabic, Bulgarian, German, Greek, English, Spanish, French, Hindi, Russian, Swahili, Thai, Turkish, Urdu, Vietnamese, Chinese | -| [xnli_eu](xnli_eu/README.md) | Cross-lingual Natural Language Inference tasks in Basque. | Basque | -| [xquad](xquad/README.md) | Cross-lingual Question Answering Dataset in multiple languages. | Arabic, German, Greek, English, Spanish, Hindi, Romanian, Russian, Thai, Turkish, Vietnamese, Chinese | -| [xstorycloze](xstorycloze/README.md) | Cross-lingual narrative understanding tasks to predict story endings in multiple languages. | Russian, Simplified Chinese, Spanish, Arabic, Hindi, Indonesian, Telugu, Swahili, Basque, Burmese | -| [xwinograd](xwinograd/README.md) | Cross-lingual Winograd schema tasks for coreference resolution in multiple languages. | English, French, Japanese, Portuguese, Russian, Chinese | +| [xnli_eu](xnli_eu/README.md) | Cross-lingual Natural Language Inference tasks in Basque. | Basque | +| [xquad](xquad/README.md) | Cross-lingual Question Answering Dataset in multiple languages. | Arabic, German, Greek, English, Spanish, Hindi, Romanian, Russian, Thai, Turkish, Vietnamese, Chinese | +| [xstorycloze](xstorycloze/README.md) | Cross-lingual narrative understanding tasks to predict story endings in multiple languages. | Russian, Simplified Chinese, Spanish, Arabic, Hindi, Indonesian, Telugu, Swahili, Basque, Burmese | +| [xwinograd](xwinograd/README.md) | Cross-lingual Winograd schema tasks for coreference resolution in multiple languages. | English, French, Japanese, Portuguese, Russian, Chinese | diff --git a/lm_eval/tasks/hrm8k/README.md b/lm_eval/tasks/hrm8k/README.md new file mode 100644 index 00000000..cd5a1739 --- /dev/null +++ b/lm_eval/tasks/hrm8k/README.md @@ -0,0 +1,46 @@ +# HRM8K + +### Paper + +Title: [Understand, Solve and Translate: Bridging the Multilingual Mathematical Reasoning Gap](https://www.arxiv.org/abs/2501.02448) + +Large language models (LLMs) demonstrate exceptional performance on complex reasoning tasks. However, despite their strong reasoning capabilities in high-resource languages (e.g., English and Chinese), a significant performance gap persists in other languages. To investigate this gap in Korean, we introduce HRM8K, a benchmark comprising 8,011 English-Korean parallel bilingual math problems. Through systematic analysis of model behaviors, we identify a key finding: these performance disparities stem primarily from difficulties in comprehending non-English inputs, rather than limitations in reasoning capabilities. Based on these findings, we propose UST (Understand, Solve, and Translate), a method that strategically uses English as an anchor for reasoning and solution generation. By fine-tuning the model on 130k synthetically generated data points, UST achieves a 10.91% improvement on the HRM8K benchmark and reduces the multilingual performance gap from 11.6% to 0.7%. Additionally, we show that improvements from UST generalize effectively to different Korean domains, demonstrating that capabilities acquired from machine-verifiable content can be generalized to other areas. We publicly release the benchmark, training dataset, and models. + +Homepage: https://huggingface.co/datasets/HAERAE-HUB/HRM8K + + +### Citation + +``` +@article{ko2025understand, + title={Understand, Solve and Translate: Bridging the Multilingual Mathematical Reasoning Gap}, + author={Ko, Hyunwoo and Son, Guijin and Choi, Dasol}, + journal={arXiv preprint arXiv:2501.02448}, + year={2025} +} +``` + +### Groups and and Tasks + +#### Groups + +* `hrm8k`: HRM8K comprises 8,011 instances for evaluation, sourced through a combination of translations from established English benchmarks (e.g., GSM8K, MATH, OmniMath, MMMLU) and original problems curated from existing Korean math exams. This benchmark consists of Korean instruction and question. +* `hrm8k_en`: English version of `hrm8k`. This benchmark consists of English instruction and question. + +#### Tasks + +* `hrm8k_{gsm8k|ksm|math|mmmlu|omni_math}` +* `hrm8k_en_{gsm8k|ksm|math|mmmlu|omni_math}` + +### Checklist + +For adding novel benchmarks/datasets to the library: +* [x] Is the task an existing benchmark in the literature? + * [x] Have you referenced the original paper that introduced the task? + * [ ] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test? + + +If other tasks on this dataset are already supported: +* [ ] Is the "Main" variant of this task clearly denoted? +* [ ] Have you provided a short sentence in a README on what each new variant adds / evaluates? +* [ ] Have you noted which, if any, published evaluation setups are matched by this variant? diff --git a/lm_eval/tasks/hrm8k/default/_hrm8k_yaml b/lm_eval/tasks/hrm8k/default/_hrm8k_yaml new file mode 100644 index 00000000..18c53d22 --- /dev/null +++ b/lm_eval/tasks/hrm8k/default/_hrm8k_yaml @@ -0,0 +1,22 @@ +dataset_path: HAERAE-HUB/HRM8K +output_type: generate_until +test_split: test +doc_to_text: !function utils.doc_to_text +doc_to_target: !function utils.doc_to_target +process_results: !function utils.process_results +num_fewshot: 0 +generation_kwargs: + until: + - "" + - "<|end_of_text|>" + - "<|endoftext|>" + - "<|im_end|>" + max_gen_toks: 512 + do_sample: false + temperature: 0 +metric_list: + - metric: exact_match + aggregation: mean + higher_is_better: true +metadata: + version: 1.0 diff --git a/lm_eval/tasks/hrm8k/default/hrm8k.yaml b/lm_eval/tasks/hrm8k/default/hrm8k.yaml new file mode 100644 index 00000000..cc9753f6 --- /dev/null +++ b/lm_eval/tasks/hrm8k/default/hrm8k.yaml @@ -0,0 +1,13 @@ +group: hrm8k +task: + - hrm8k_gsm8k + - hrm8k_ksm + - hrm8k_math + - hrm8k_mmmlu + - hrm8k_omni_math +metric_list: + - metric: exact_match + aggregation: mean + higher_is_better: true +metadata: + version: 1.0 diff --git a/lm_eval/tasks/hrm8k/default/hrm8k_gsm8k.yaml b/lm_eval/tasks/hrm8k/default/hrm8k_gsm8k.yaml new file mode 100644 index 00000000..a46ff5a0 --- /dev/null +++ b/lm_eval/tasks/hrm8k/default/hrm8k_gsm8k.yaml @@ -0,0 +1,3 @@ +include: _hrm8k_yaml +dataset_name: GSM8K +task: hrm8k_gsm8k diff --git a/lm_eval/tasks/hrm8k/default/hrm8k_ksm.yaml b/lm_eval/tasks/hrm8k/default/hrm8k_ksm.yaml new file mode 100644 index 00000000..3c1f7ac2 --- /dev/null +++ b/lm_eval/tasks/hrm8k/default/hrm8k_ksm.yaml @@ -0,0 +1,3 @@ +include: _hrm8k_yaml +dataset_name: KSM +task: hrm8k_ksm diff --git a/lm_eval/tasks/hrm8k/default/hrm8k_math.yaml b/lm_eval/tasks/hrm8k/default/hrm8k_math.yaml new file mode 100644 index 00000000..ecdf67cf --- /dev/null +++ b/lm_eval/tasks/hrm8k/default/hrm8k_math.yaml @@ -0,0 +1,3 @@ +include: _hrm8k_yaml +dataset_name: MATH +task: hrm8k_math diff --git a/lm_eval/tasks/hrm8k/default/hrm8k_mmmlu.yaml b/lm_eval/tasks/hrm8k/default/hrm8k_mmmlu.yaml new file mode 100644 index 00000000..20faaaf1 --- /dev/null +++ b/lm_eval/tasks/hrm8k/default/hrm8k_mmmlu.yaml @@ -0,0 +1,4 @@ +include: _hrm8k_yaml +dataset_name: MMMLU +task: hrm8k_mmmlu +doc_to_text: !function utils.doc_to_text_mmmlu diff --git a/lm_eval/tasks/hrm8k/default/hrm8k_omni_math.yaml b/lm_eval/tasks/hrm8k/default/hrm8k_omni_math.yaml new file mode 100644 index 00000000..c2dadac2 --- /dev/null +++ b/lm_eval/tasks/hrm8k/default/hrm8k_omni_math.yaml @@ -0,0 +1,3 @@ +include: _hrm8k_yaml +dataset_name: OMNI_MATH +task: hrm8k_omni_math diff --git a/lm_eval/tasks/hrm8k/default/utils.py b/lm_eval/tasks/hrm8k/default/utils.py new file mode 100644 index 00000000..aaeecd14 --- /dev/null +++ b/lm_eval/tasks/hrm8k/default/utils.py @@ -0,0 +1,285 @@ +import re +from typing import Dict, List + + +def doc_to_text(doc): + text = ( + "주어진 문제를 풀어보세요.\n" + "문제를 푼 후, 최종 답변을 다음과 같은 형식으로 작성하세요: $\\boxed{N}$.\n\n" + f"문제: {doc['question'].strip()}\n답변:" + ) + return text + + +def doc_to_text_mmmlu(doc): + text = ( + "주어진 문제를 풀어보세요.\n" + "문제를 푼 후, 주어진 선택지 (1, 2, 3, 4) 중 최종 선택지를 다음 형식으로 작성하세요: $\\boxed{N}$.\n\n" + f"문제: {doc['question'].strip()}\n답변:" + ) + return text + + +def doc_to_target(doc): + return postprocess(doc["answer"]) + + +def postprocess(s): + s = str(s).strip() + try: + float_value = float(s) + return str(int(float_value)) if float_value.is_integer() else str(float_value) + except Exception: + return s + + +def process_results(doc: dict, results: List[str]) -> Dict[str, int]: + candidate = results[0] + + gold = postprocess(doc["answer"]) + + if not gold: + print(doc, candidate, gold) + if is_equiv(candidate, gold): + retval = 1 + else: + retval = 0 + + results = { + "exact_match": retval, + } + return results + + +def is_equiv(str1, str2, verbose=False): + if str1 is None and str2 is None: + print("WARNING: Both None") + return True + if str1 is None or str2 is None: + return False + + str1, str2 = parse_math_answer(str1), parse_math_answer(str2) + + try: + ss1 = _strip_string(str1) + ss1 = postprocess(ss1) + ss2 = _strip_string(str2) + if verbose: + print(ss1, ss2) + return ss1 == ss2 + except Exception: + return str1 == str2 + + +def parse_math_answer(raw_string): + def remove_boxed(s): + left = "\\boxed{" + try: + assert s[: len(left)] == left + assert s[-1] == "}" + answer = s[len(left) : -1] + if "=" in answer: + answer = answer.split("=")[-1].lstrip(" ") + return answer + except Exception: + return None + + def last_boxed_only_string(string): + idx = string.rfind("\\boxed") + if idx < 0: + idx = string.rfind("\\fbox") + if idx < 0: + return None + i = idx + right_brace_idx = None + num_left_braces_open = 0 + while i < len(string): + if string[i] == "{": + num_left_braces_open += 1 + if string[i] == "}": + num_left_braces_open -= 1 + if num_left_braces_open == 0: + right_brace_idx = i + break + i += 1 + + if right_brace_idx is None: + retval = None + else: + retval = string[idx : right_brace_idx + 1] + + return retval + + def get_answer_with_dollar_sign(s): + first_pattern = "\$(.*)\$" + last_match = None + matches = re.findall(first_pattern, s) + if matches: + last_match = matches[-1] + if "=" in last_match: + last_match = last_match.split("=")[-1].lstrip(" ") + return last_match + + def get_answer_without_dollar_sign(s): + last_match = None + if "=" in s: + last_match = s.split("=")[-1].lstrip(" ").rstrip(".") + if "\\n" in last_match: + last_match = last_match.split("\\n")[0] + else: + pattern = "(?:\\$)?\d+(?:\.\d+)?(?![\w\d])" + matches = re.findall(pattern, s) + if matches: + last_match = matches[-1] + return last_match + + if "\\boxed" in raw_string: + answer = remove_boxed(last_boxed_only_string(raw_string)) + else: + answer = get_answer_with_dollar_sign(raw_string) + if not answer: + answer = get_answer_without_dollar_sign(raw_string) + return answer + + +# code from https://github.com/hendrycks/math/blob/main/modeling/math_equivalence.py +def _fix_fracs(string): + substrs = string.split("\\frac") + new_str = substrs[0] + if len(substrs) > 1: + substrs = substrs[1:] + for substr in substrs: + new_str += "\\frac" + if substr[0] == "{": + new_str += substr + else: + try: + assert len(substr) >= 2 + except Exception: + return string + a = substr[0] + b = substr[1] + if b != "{": + if len(substr) > 2: + post_substr = substr[2:] + new_str += "{" + a + "}{" + b + "}" + post_substr + else: + new_str += "{" + a + "}{" + b + "}" + else: + if len(substr) > 2: + post_substr = substr[2:] + new_str += "{" + a + "}" + b + post_substr + else: + new_str += "{" + a + "}" + b + string = new_str + return string + + +def _fix_a_slash_b(string): + if len(string.split("/")) != 2: + return string + a = string.split("/")[0] + b = string.split("/")[1] + try: + a = int(a) + b = int(b) + assert string == "{}/{}".format(a, b) + new_string = "\\frac{" + str(a) + "}{" + str(b) + "}" + return new_string + except Exception: + return string + + +def _remove_right_units(string): + # "\\text{ " only ever occurs (at least in the val set) when describing units + if "\\text{ " in string: + splits = string.split("\\text{ ") + assert len(splits) == 2 + return splits[0] + else: + return string + + +def _fix_sqrt(string): + if "\\sqrt" not in string: + return string + splits = string.split("\\sqrt") + new_string = splits[0] + for split in splits[1:]: + if split[0] != "{": + a = split[0] + new_substr = "\\sqrt{" + a + "}" + split[1:] + else: + new_substr = "\\sqrt" + split + new_string += new_substr + return new_string + + +def _strip_string(string): + # linebreaks + string = string.replace("\n", "") + # print(string) + + # remove inverse spaces + string = string.replace("\\!", "") + # print(string) + + # replace \\ with \ + string = string.replace("\\\\", "\\") + # print(string) + + # replace tfrac and dfrac with frac + string = string.replace("tfrac", "frac") + string = string.replace("dfrac", "frac") + # print(string) + + # remove \left and \right + string = string.replace("\\left", "") + string = string.replace("\\right", "") + # print(string) + + # Remove circ (degrees) + string = string.replace("^{\\circ}", "") + string = string.replace("^\\circ", "") + + # remove dollar signs + string = string.replace("\\$", "") + + # remove units (on the right) + string = _remove_right_units(string) + + # remove percentage + string = string.replace("\\%", "") + string = string.replace("\%", "") + + # " 0." equivalent to " ." and "{0." equivalent to "{." Alternatively, add "0" if "." is the start of the string + string = string.replace(" .", " 0.") + string = string.replace("{.", "{0.") + # if empty, return empty string + if len(string) == 0: + return string + if string[0] == ".": + string = "0" + string + + # to consider: get rid of e.g. "k = " or "q = " at beginning + if len(string.split("=")) == 2: + if len(string.split("=")[0]) <= 2: + string = string.split("=")[1] + + # fix sqrt3 --> sqrt{3} + string = _fix_sqrt(string) + + # remove spaces + string = string.replace(" ", "") + + # \frac1b or \frac12 --> \frac{1}{b} and \frac{1}{2}, etc. Even works with \frac1{72} (but not \frac{72}1). Also does a/b --> \\frac{a}{b} + string = _fix_fracs(string) + + # manually change 0.5 --> \frac{1}{2} + if string == "0.5": + string = "\\frac{1}{2}" + + # NOTE: X/Y changed to \frac{X}{Y} in dataset, but in simple cases fix in case the model output is X/Y + string = _fix_a_slash_b(string) + + return string diff --git a/lm_eval/tasks/hrm8k/en/_hrm8k_en_yaml b/lm_eval/tasks/hrm8k/en/_hrm8k_en_yaml new file mode 100644 index 00000000..18c53d22 --- /dev/null +++ b/lm_eval/tasks/hrm8k/en/_hrm8k_en_yaml @@ -0,0 +1,22 @@ +dataset_path: HAERAE-HUB/HRM8K +output_type: generate_until +test_split: test +doc_to_text: !function utils.doc_to_text +doc_to_target: !function utils.doc_to_target +process_results: !function utils.process_results +num_fewshot: 0 +generation_kwargs: + until: + - "" + - "<|end_of_text|>" + - "<|endoftext|>" + - "<|im_end|>" + max_gen_toks: 512 + do_sample: false + temperature: 0 +metric_list: + - metric: exact_match + aggregation: mean + higher_is_better: true +metadata: + version: 1.0 diff --git a/lm_eval/tasks/hrm8k/en/hrm8k_en.yaml b/lm_eval/tasks/hrm8k/en/hrm8k_en.yaml new file mode 100644 index 00000000..17eac64a --- /dev/null +++ b/lm_eval/tasks/hrm8k/en/hrm8k_en.yaml @@ -0,0 +1,13 @@ +group: hrm8k_en +task: + - hrm8k_gsm8k_en + - hrm8k_ksm_en + - hrm8k_math_en + - hrm8k_mmmlu_en + - hrm8k_omni_math_en +metric_list: + - metric: exact_match + aggregation: mean + higher_is_better: true +metadata: + version: 1.0 diff --git a/lm_eval/tasks/hrm8k/en/hrm8k_gsm8k_en.yaml b/lm_eval/tasks/hrm8k/en/hrm8k_gsm8k_en.yaml new file mode 100644 index 00000000..c2697a0b --- /dev/null +++ b/lm_eval/tasks/hrm8k/en/hrm8k_gsm8k_en.yaml @@ -0,0 +1,3 @@ +include: _hrm8k_en_yaml +dataset_name: GSM8K +task: hrm8k_gsm8k_en diff --git a/lm_eval/tasks/hrm8k/en/hrm8k_ksm_en.yaml b/lm_eval/tasks/hrm8k/en/hrm8k_ksm_en.yaml new file mode 100644 index 00000000..a5e34d45 --- /dev/null +++ b/lm_eval/tasks/hrm8k/en/hrm8k_ksm_en.yaml @@ -0,0 +1,3 @@ +include: _hrm8k_en_yaml +dataset_name: KSM +task: hrm8k_ksm_en diff --git a/lm_eval/tasks/hrm8k/en/hrm8k_math_en.yaml b/lm_eval/tasks/hrm8k/en/hrm8k_math_en.yaml new file mode 100644 index 00000000..ffbdce81 --- /dev/null +++ b/lm_eval/tasks/hrm8k/en/hrm8k_math_en.yaml @@ -0,0 +1,3 @@ +include: _hrm8k_en_yaml +dataset_name: MATH +task: hrm8k_math_en diff --git a/lm_eval/tasks/hrm8k/en/hrm8k_mmmlu_en.yaml b/lm_eval/tasks/hrm8k/en/hrm8k_mmmlu_en.yaml new file mode 100644 index 00000000..812f62e2 --- /dev/null +++ b/lm_eval/tasks/hrm8k/en/hrm8k_mmmlu_en.yaml @@ -0,0 +1,4 @@ +include: _hrm8k_en_yaml +dataset_name: MMMLU +task: hrm8k_mmmlu_en +doc_to_text: !function utils.doc_to_text_mmmlu diff --git a/lm_eval/tasks/hrm8k/en/hrm8k_omni_math_en.yaml b/lm_eval/tasks/hrm8k/en/hrm8k_omni_math_en.yaml new file mode 100644 index 00000000..f859de3d --- /dev/null +++ b/lm_eval/tasks/hrm8k/en/hrm8k_omni_math_en.yaml @@ -0,0 +1,3 @@ +include: _hrm8k_en_yaml +dataset_name: OMNI_MATH +task: hrm8k_omni_math_en diff --git a/lm_eval/tasks/hrm8k/en/utils.py b/lm_eval/tasks/hrm8k/en/utils.py new file mode 100644 index 00000000..b67d8e91 --- /dev/null +++ b/lm_eval/tasks/hrm8k/en/utils.py @@ -0,0 +1,285 @@ +import re +from typing import Dict, List + + +def doc_to_text(doc): + text = ( + "Solve the given question.\n" + "After solving the problem, state your final answer in the following format: $\\boxed{N}$.\n\n" + f"Question: {doc['original'].strip()}\nAnswer:" + ) + return text + + +def doc_to_text_mmmlu(doc): + text = ( + "Solve the given question.\n" + "After solving the problem, state your final choice among the choices (1, 2, 3, 4) in the following format: $\\boxed{N}$.\n\n" + f"Question: {doc['original'].strip()}\nAnswer:" + ) + return text + + +def doc_to_target(doc): + return postprocess(doc["answer"]) + + +def postprocess(s): + s = str(s).strip() + try: + float_value = float(s) + return str(int(float_value)) if float_value.is_integer() else str(float_value) + except Exception: + return s + + +def process_results(doc: dict, results: List[str]) -> Dict[str, int]: + candidate = results[0] + + gold = postprocess(doc["answer"]) + + if not gold: + print(doc, candidate, gold) + if is_equiv(candidate, gold): + retval = 1 + else: + retval = 0 + + results = { + "exact_match": retval, + } + return results + + +def is_equiv(str1, str2, verbose=False): + if str1 is None and str2 is None: + print("WARNING: Both None") + return True + if str1 is None or str2 is None: + return False + + str1, str2 = parse_math_answer(str1), parse_math_answer(str2) + + try: + ss1 = _strip_string(str1) + ss1 = postprocess(ss1) + ss2 = _strip_string(str2) + if verbose: + print(ss1, ss2) + return ss1 == ss2 + except Exception: + return str1 == str2 + + +def parse_math_answer(raw_string): + def remove_boxed(s): + left = "\\boxed{" + try: + assert s[: len(left)] == left + assert s[-1] == "}" + answer = s[len(left) : -1] + if "=" in answer: + answer = answer.split("=")[-1].lstrip(" ") + return answer + except Exception: + return None + + def last_boxed_only_string(string): + idx = string.rfind("\\boxed") + if idx < 0: + idx = string.rfind("\\fbox") + if idx < 0: + return None + i = idx + right_brace_idx = None + num_left_braces_open = 0 + while i < len(string): + if string[i] == "{": + num_left_braces_open += 1 + if string[i] == "}": + num_left_braces_open -= 1 + if num_left_braces_open == 0: + right_brace_idx = i + break + i += 1 + + if right_brace_idx is None: + retval = None + else: + retval = string[idx : right_brace_idx + 1] + + return retval + + def get_answer_with_dollar_sign(s): + first_pattern = "\$(.*)\$" + last_match = None + matches = re.findall(first_pattern, s) + if matches: + last_match = matches[-1] + if "=" in last_match: + last_match = last_match.split("=")[-1].lstrip(" ") + return last_match + + def get_answer_without_dollar_sign(s): + last_match = None + if "=" in s: + last_match = s.split("=")[-1].lstrip(" ").rstrip(".") + if "\\n" in last_match: + last_match = last_match.split("\\n")[0] + else: + pattern = "(?:\\$)?\d+(?:\.\d+)?(?![\w\d])" + matches = re.findall(pattern, s) + if matches: + last_match = matches[-1] + return last_match + + if "\\boxed" in raw_string: + answer = remove_boxed(last_boxed_only_string(raw_string)) + else: + answer = get_answer_with_dollar_sign(raw_string) + if not answer: + answer = get_answer_without_dollar_sign(raw_string) + return answer + + +# code from https://github.com/hendrycks/math/blob/main/modeling/math_equivalence.py +def _fix_fracs(string): + substrs = string.split("\\frac") + new_str = substrs[0] + if len(substrs) > 1: + substrs = substrs[1:] + for substr in substrs: + new_str += "\\frac" + if substr[0] == "{": + new_str += substr + else: + try: + assert len(substr) >= 2 + except Exception: + return string + a = substr[0] + b = substr[1] + if b != "{": + if len(substr) > 2: + post_substr = substr[2:] + new_str += "{" + a + "}{" + b + "}" + post_substr + else: + new_str += "{" + a + "}{" + b + "}" + else: + if len(substr) > 2: + post_substr = substr[2:] + new_str += "{" + a + "}" + b + post_substr + else: + new_str += "{" + a + "}" + b + string = new_str + return string + + +def _fix_a_slash_b(string): + if len(string.split("/")) != 2: + return string + a = string.split("/")[0] + b = string.split("/")[1] + try: + a = int(a) + b = int(b) + assert string == "{}/{}".format(a, b) + new_string = "\\frac{" + str(a) + "}{" + str(b) + "}" + return new_string + except Exception: + return string + + +def _remove_right_units(string): + # "\\text{ " only ever occurs (at least in the val set) when describing units + if "\\text{ " in string: + splits = string.split("\\text{ ") + assert len(splits) == 2 + return splits[0] + else: + return string + + +def _fix_sqrt(string): + if "\\sqrt" not in string: + return string + splits = string.split("\\sqrt") + new_string = splits[0] + for split in splits[1:]: + if split[0] != "{": + a = split[0] + new_substr = "\\sqrt{" + a + "}" + split[1:] + else: + new_substr = "\\sqrt" + split + new_string += new_substr + return new_string + + +def _strip_string(string): + # linebreaks + string = string.replace("\n", "") + # print(string) + + # remove inverse spaces + string = string.replace("\\!", "") + # print(string) + + # replace \\ with \ + string = string.replace("\\\\", "\\") + # print(string) + + # replace tfrac and dfrac with frac + string = string.replace("tfrac", "frac") + string = string.replace("dfrac", "frac") + # print(string) + + # remove \left and \right + string = string.replace("\\left", "") + string = string.replace("\\right", "") + # print(string) + + # Remove circ (degrees) + string = string.replace("^{\\circ}", "") + string = string.replace("^\\circ", "") + + # remove dollar signs + string = string.replace("\\$", "") + + # remove units (on the right) + string = _remove_right_units(string) + + # remove percentage + string = string.replace("\\%", "") + string = string.replace("\%", "") + + # " 0." equivalent to " ." and "{0." equivalent to "{." Alternatively, add "0" if "." is the start of the string + string = string.replace(" .", " 0.") + string = string.replace("{.", "{0.") + # if empty, return empty string + if len(string) == 0: + return string + if string[0] == ".": + string = "0" + string + + # to consider: get rid of e.g. "k = " or "q = " at beginning + if len(string.split("=")) == 2: + if len(string.split("=")[0]) <= 2: + string = string.split("=")[1] + + # fix sqrt3 --> sqrt{3} + string = _fix_sqrt(string) + + # remove spaces + string = string.replace(" ", "") + + # \frac1b or \frac12 --> \frac{1}{b} and \frac{1}{2}, etc. Even works with \frac1{72} (but not \frac{72}1). Also does a/b --> \\frac{a}{b} + string = _fix_fracs(string) + + # manually change 0.5 --> \frac{1}{2} + if string == "0.5": + string = "\\frac{1}{2}" + + # NOTE: X/Y changed to \frac{X}{Y} in dataset, but in simple cases fix in case the model output is X/Y + string = _fix_a_slash_b(string) + + return string -- GitLab From 6dac8c694b0065e9b29ea2499bec516f9b759952 Mon Sep 17 00:00:00 2001 From: Boda Sadallah Date: Tue, 21 Jan 2025 00:46:18 +0400 Subject: [PATCH 04/19] New arabicmmlu (#2541) * point to the original ArabicMMLU dataset * create the new subtasks files * fix bug when the context filed is empty --- lm_eval/tasks/arabicmmlu/_arabicmmlu.yaml | 2 +- .../arabicmmlu/_arabicmmlu_humanities.yaml | 2 +- .../arabicmmlu/_arabicmmlu_language.yaml | 2 +- .../tasks/arabicmmlu/_arabicmmlu_other.yaml | 2 +- .../_arabicmmlu_social_science.yaml | 2 +- .../tasks/arabicmmlu/_arabicmmlu_stem.yaml | 2 +- .../_default_arabicmmlu_template_yaml | 4 +- lm_eval/tasks/arabicmmlu/_generate_configs.py | 91 +++++++++---------- .../arabicmmlu_accounting_university.yaml | 5 + .../arabicmmlu_arabic_language_general.yaml | 4 +- .../arabicmmlu_arabic_language_grammar.yaml | 4 +- ...rabicmmlu_arabic_language_high_school.yaml | 5 + ...bicmmlu_arabic_language_middle_school.yaml | 5 + ...icmmlu_arabic_language_primary_school.yaml | 5 + .../arabicmmlu_biology_high_school.yaml | 5 + .../arabicmmlu_civics_high_school.yaml | 5 + .../arabicmmlu_civics_middle_school.yaml | 5 + ...abicmmlu_computer_science_high_school.yaml | 5 + ...icmmlu_computer_science_middle_school.yaml | 5 + ...cmmlu_computer_science_primary_school.yaml | 5 + ...rabicmmlu_computer_science_university.yaml | 5 + .../arabicmmlu/arabicmmlu_driving_test.yaml | 2 +- .../arabicmmlu_economics_high_school.yaml | 5 + .../arabicmmlu_economics_middle_school.yaml | 5 + .../arabicmmlu_economics_university.yaml | 5 + .../arabicmmlu_general_knowledge.yaml | 2 +- ...cmmlu_general_knowledge_middle_school.yaml | 5 + ...mmlu_general_knowledge_primary_school.yaml | 5 + .../arabicmmlu_geography_high_school.yaml | 5 + .../arabicmmlu_geography_middle_school.yaml | 5 + .../arabicmmlu_geography_primary_school.yaml | 5 + .../arabicmmlu_high_arabic_language.yaml | 5 - .../arabicmmlu/arabicmmlu_high_biology.yaml | 5 - .../arabicmmlu/arabicmmlu_high_civics.yaml | 5 - .../arabicmmlu_high_computer_science.yaml | 5 - .../arabicmmlu/arabicmmlu_high_economics.yaml | 5 - .../arabicmmlu/arabicmmlu_high_geography.yaml | 5 - .../arabicmmlu/arabicmmlu_high_history.yaml | 5 - .../arabicmmlu_high_islamic_studies.yaml | 5 - .../arabicmmlu_high_philosophy.yaml | 5 - .../arabicmmlu/arabicmmlu_high_physics.yaml | 5 - .../arabicmmlu_history_high_school.yaml | 5 + .../arabicmmlu_history_middle_school.yaml | 5 + .../arabicmmlu_history_primary_school.yaml | 5 + .../arabicmmlu_islamic_studies.yaml | 2 +- ...rabicmmlu_islamic_studies_high_school.yaml | 5 + ...bicmmlu_islamic_studies_middle_school.yaml | 5 + ...icmmlu_islamic_studies_primary_school.yaml | 5 + .../arabicmmlu_law_professional.yaml | 5 + .../arabicmmlu_management_university.yaml | 5 + .../arabicmmlu_math_primary_school.yaml | 5 + .../arabicmmlu_middle_arabic_language.yaml | 5 - .../arabicmmlu/arabicmmlu_middle_civics.yaml | 5 - .../arabicmmlu_middle_computer_science.yaml | 5 - .../arabicmmlu_middle_economics.yaml | 5 - .../arabicmmlu_middle_general_knowledge.yaml | 5 - .../arabicmmlu_middle_geography.yaml | 5 - .../arabicmmlu/arabicmmlu_middle_history.yaml | 5 - .../arabicmmlu_middle_islamic_studies.yaml | 5 - .../arabicmmlu_middle_natural_science.yaml | 5 - .../arabicmmlu_middle_social_science.yaml | 5 - ...bicmmlu_natural_science_middle_school.yaml | 5 + ...icmmlu_natural_science_primary_school.yaml | 5 + .../arabicmmlu_philosophy_high_school.yaml | 5 + .../arabicmmlu_physics_high_school.yaml | 5 + ...abicmmlu_political_science_university.yaml | 5 + .../arabicmmlu_primary_arabic_language.yaml | 5 - .../arabicmmlu_primary_computer_science.yaml | 5 - .../arabicmmlu_primary_general_knowledge.yaml | 5 - .../arabicmmlu_primary_geography.yaml | 5 - .../arabicmmlu_primary_history.yaml | 5 - .../arabicmmlu_primary_islamic_studies.yaml | 5 - .../arabicmmlu/arabicmmlu_primary_math.yaml | 5 - .../arabicmmlu_primary_natural_science.yaml | 5 - .../arabicmmlu_primary_social_science.yaml | 5 - .../tasks/arabicmmlu/arabicmmlu_prof_law.yaml | 5 - ...abicmmlu_social_science_middle_school.yaml | 5 + ...bicmmlu_social_science_primary_school.yaml | 5 + .../arabicmmlu_univ_accounting.yaml | 5 - .../arabicmmlu_univ_computer_science.yaml | 5 - .../arabicmmlu/arabicmmlu_univ_economics.yaml | 5 - .../arabicmmlu_univ_management.yaml | 5 - .../arabicmmlu_univ_political_science.yaml | 5 - lm_eval/tasks/arabicmmlu/utils.py | 2 +- 84 files changed, 236 insertions(+), 237 deletions(-) create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_accounting_university.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_high_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_middle_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_primary_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_biology_high_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_civics_high_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_civics_middle_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_high_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_middle_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_primary_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_university.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_economics_high_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_economics_middle_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_economics_university.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge_middle_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge_primary_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_geography_high_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_geography_middle_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_geography_primary_school.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_high_arabic_language.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_high_biology.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_high_civics.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_high_computer_science.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_high_economics.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_high_geography.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_high_history.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_high_islamic_studies.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_high_philosophy.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_high_physics.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_history_high_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_history_middle_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_history_primary_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_high_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_middle_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_primary_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_law_professional.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_management_university.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_math_primary_school.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_middle_arabic_language.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_middle_civics.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_middle_computer_science.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_middle_economics.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_middle_general_knowledge.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_middle_geography.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_middle_history.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_middle_islamic_studies.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_middle_natural_science.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_middle_social_science.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_natural_science_middle_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_natural_science_primary_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_philosophy_high_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_physics_high_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_political_science_university.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_primary_arabic_language.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_primary_computer_science.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_primary_general_knowledge.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_primary_geography.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_primary_history.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_primary_islamic_studies.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_primary_math.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_primary_natural_science.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_primary_social_science.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_prof_law.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_social_science_middle_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_social_science_primary_school.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_univ_accounting.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_univ_computer_science.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_univ_economics.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_univ_management.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_univ_political_science.yaml diff --git a/lm_eval/tasks/arabicmmlu/_arabicmmlu.yaml b/lm_eval/tasks/arabicmmlu/_arabicmmlu.yaml index 58cf795a..08ed9bb0 100644 --- a/lm_eval/tasks/arabicmmlu/_arabicmmlu.yaml +++ b/lm_eval/tasks/arabicmmlu/_arabicmmlu.yaml @@ -9,4 +9,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 0 + version: 1 diff --git a/lm_eval/tasks/arabicmmlu/_arabicmmlu_humanities.yaml b/lm_eval/tasks/arabicmmlu/_arabicmmlu_humanities.yaml index 6f61004a..b52bc804 100644 --- a/lm_eval/tasks/arabicmmlu/_arabicmmlu_humanities.yaml +++ b/lm_eval/tasks/arabicmmlu/_arabicmmlu_humanities.yaml @@ -6,4 +6,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 0 + version: 1 diff --git a/lm_eval/tasks/arabicmmlu/_arabicmmlu_language.yaml b/lm_eval/tasks/arabicmmlu/_arabicmmlu_language.yaml index 90e57ae0..d9f62abc 100644 --- a/lm_eval/tasks/arabicmmlu/_arabicmmlu_language.yaml +++ b/lm_eval/tasks/arabicmmlu/_arabicmmlu_language.yaml @@ -6,4 +6,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 0 + version: 1 diff --git a/lm_eval/tasks/arabicmmlu/_arabicmmlu_other.yaml b/lm_eval/tasks/arabicmmlu/_arabicmmlu_other.yaml index 3e989b8c..d96dc0bd 100644 --- a/lm_eval/tasks/arabicmmlu/_arabicmmlu_other.yaml +++ b/lm_eval/tasks/arabicmmlu/_arabicmmlu_other.yaml @@ -6,4 +6,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 0 + version: 1 diff --git a/lm_eval/tasks/arabicmmlu/_arabicmmlu_social_science.yaml b/lm_eval/tasks/arabicmmlu/_arabicmmlu_social_science.yaml index 1ece047b..b40e7c80 100644 --- a/lm_eval/tasks/arabicmmlu/_arabicmmlu_social_science.yaml +++ b/lm_eval/tasks/arabicmmlu/_arabicmmlu_social_science.yaml @@ -6,4 +6,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 0 + version: 1 diff --git a/lm_eval/tasks/arabicmmlu/_arabicmmlu_stem.yaml b/lm_eval/tasks/arabicmmlu/_arabicmmlu_stem.yaml index a464a62a..5065d0bd 100644 --- a/lm_eval/tasks/arabicmmlu/_arabicmmlu_stem.yaml +++ b/lm_eval/tasks/arabicmmlu/_arabicmmlu_stem.yaml @@ -6,4 +6,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 0 + version: 1 diff --git a/lm_eval/tasks/arabicmmlu/_default_arabicmmlu_template_yaml b/lm_eval/tasks/arabicmmlu/_default_arabicmmlu_template_yaml index eac23577..471c0fc0 100644 --- a/lm_eval/tasks/arabicmmlu/_default_arabicmmlu_template_yaml +++ b/lm_eval/tasks/arabicmmlu/_default_arabicmmlu_template_yaml @@ -1,4 +1,4 @@ -dataset_path: yazeed7/ArabicMMLU +dataset_path: MBZUAI/ArabicMMLU test_split: test fewshot_split: dev fewshot_config: @@ -12,4 +12,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 0.0 + version: 1.0 diff --git a/lm_eval/tasks/arabicmmlu/_generate_configs.py b/lm_eval/tasks/arabicmmlu/_generate_configs.py index 4d091e12..ea59fe98 100644 --- a/lm_eval/tasks/arabicmmlu/_generate_configs.py +++ b/lm_eval/tasks/arabicmmlu/_generate_configs.py @@ -13,48 +13,46 @@ from tqdm import tqdm eval_logger = logging.getLogger("lm-eval") -SUBJECTS = { - "Driving Test": "other", - "High Geography": "social_science", - "High History": "humanities", - "Islamic Studies": "humanities", - "Univ Accounting": "social_science", - "Primary General Knowledge": "other", - "Univ Political Science": "social_science", - "Primary Math": "stem", - "Middle General Knowledge": "other", - "High Biology": "stem", - "Primary Natural Science": "stem", - "High Economics": "social_science", - "Middle Natural Science": "stem", - "Middle Geography": "social_science", - "Primary Social Science": "social_science", - "Middle Computer Science": "stem", - "Middle Islamic Studies": "humanities", - "Primary Computer Science": "stem", - "High Physics": "stem", - "Middle Social Science": "social_science", - "Middle Civics": "social_science", - "High Computer Science": "stem", - "General Knowledge": "other", - "High Civics": "social_science", - "Prof Law": "humanities", - "High Islamic Studies": "humanities", - "Primary Arabic Language": "language", - "High Arabic Language": "language", - "Arabic Language (Grammar)": "language", - "Primary History": "humanities", - "Middle History": "humanities", - "Univ Economics": "social_science", - "Arabic Language (General)": "language", - "Univ Computer Science": "stem", - "Primary Islamic Studies": "humanities", - "Primary Geography": "social_science", - "High Philosophy": "humanities", - "Middle Arabic Language": "language", - "Middle Economics": "social_science", - "Univ Management": "other", -} +SUBJECTS = {'Islamic Studies': 'humanities', + 'Driving Test': 'other', + 'Natural Science (Middle School)': 'stem', + 'Natural Science (Primary School)': 'stem', + 'History (Primary School)': 'humanities', + 'History (Middle School)': 'humanities', + 'History (High School)': 'humanities', + 'General Knowledge': 'other', + 'General Knowledge (Primary School)': 'other', + 'General Knowledge (Middle School)': 'other', + 'Law (Professional)': 'humanities', + 'Physics (High School)': 'stem', + 'Social Science (Middle School)': 'social_science', + 'Social Science (Primary School)': 'social_science', + 'Management (University)': 'other', + 'Arabic Language (Primary School)': 'language', + 'Arabic Language (Middle School)': 'language', + 'Arabic Language (High School)': 'language', + 'Political Science (University)': 'social_science', + 'Philosophy (High School)': 'humanities', + 'Accounting (University)': 'social_science', + 'Computer Science (University)': 'stem', + 'Computer Science (Middle School)': 'stem', + 'Computer Science (Primary School)': 'stem', + 'Computer Science (High School)': 'stem', + 'Geography (Primary School)': 'social_science', + 'Geography (Middle School)': 'social_science', + 'Geography (High School)': 'social_science', + 'Math (Primary School)': 'stem', + 'Biology (High School)': 'stem', + 'Economics (University)': 'social_science', + 'Economics (Middle School)': 'social_science', + 'Economics (High School)': 'social_science', + 'Arabic Language (General)': 'language', + 'Arabic Language (Grammar)': 'language', + 'Islamic Studies (High School)': 'humanities', + 'Islamic Studies (Middle School)': 'humanities', + 'Islamic Studies (Primary School)': 'humanities', + 'Civics (Middle School)': 'social_science', + 'Civics (High School)': 'social_science'} def parse_args(): @@ -69,8 +67,9 @@ if __name__ == "__main__": # get filename of base_yaml so we can `"include": ` it in our "other" YAMLs. base_yaml_name = os.path.split(args.base_yaml_path)[-1] - with open(args.base_yaml_path, encoding="utf-8") as f: - base_yaml = yaml.full_load(f) + + # with open(args.base_yaml_path, encoding="utf-8") as f: + # base_yaml = yaml.full_load(f) ALL_CATEGORIES = [] for subject, category in tqdm(SUBJECTS.items()): @@ -81,8 +80,8 @@ if __name__ == "__main__": yaml_dict = { "include": base_yaml_name, - "tag": f"arabicmmlu_{category}", - "task": f"arabicmmlu_{subject.lower().replace(' ', '_')}", + "tag": f"arabicmmlu_{category}_tasks", + "task": f"arabicmmlu_{subject.lower().replace(' ', '_').replace('(', '').replace(')', '')}", "task_alias": subject, "dataset_name": subject, # "description": description, diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_accounting_university.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_accounting_university.yaml new file mode 100644 index 00000000..7ec8caad --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_accounting_university.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Accounting (University)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_accounting_university" +"task_alias": "Accounting (University)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_general.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_general.yaml index f57dc08c..621312d9 100644 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_general.yaml +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_general.yaml @@ -1,5 +1,5 @@ "dataset_name": "Arabic Language (General)" -"tag": "arabicmmlu_language_tasks" "include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_arabic_language_(general)" +"tag": "arabicmmlu_language_tasks" +"task": "arabicmmlu_arabic_language_general" "task_alias": "Arabic Language (General)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_grammar.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_grammar.yaml index baf32676..0511b9d9 100644 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_grammar.yaml +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_grammar.yaml @@ -1,5 +1,5 @@ "dataset_name": "Arabic Language (Grammar)" -"tag": "arabicmmlu_language_tasks" "include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_arabic_language_(grammar)" +"tag": "arabicmmlu_language_tasks" +"task": "arabicmmlu_arabic_language_grammar" "task_alias": "Arabic Language (Grammar)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_high_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_high_school.yaml new file mode 100644 index 00000000..77dc002b --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_high_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Arabic Language (High School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_language_tasks" +"task": "arabicmmlu_arabic_language_high_school" +"task_alias": "Arabic Language (High School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_middle_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_middle_school.yaml new file mode 100644 index 00000000..9b9b2007 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_middle_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Arabic Language (Middle School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_language_tasks" +"task": "arabicmmlu_arabic_language_middle_school" +"task_alias": "Arabic Language (Middle School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_primary_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_primary_school.yaml new file mode 100644 index 00000000..3c0f045d --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_primary_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Arabic Language (Primary School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_language_tasks" +"task": "arabicmmlu_arabic_language_primary_school" +"task_alias": "Arabic Language (Primary School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_biology_high_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_biology_high_school.yaml new file mode 100644 index 00000000..865a477d --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_biology_high_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Biology (High School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_stem_tasks" +"task": "arabicmmlu_biology_high_school" +"task_alias": "Biology (High School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_civics_high_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_civics_high_school.yaml new file mode 100644 index 00000000..6f81e922 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_civics_high_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Civics (High School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_civics_high_school" +"task_alias": "Civics (High School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_civics_middle_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_civics_middle_school.yaml new file mode 100644 index 00000000..3e82c777 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_civics_middle_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Civics (Middle School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_civics_middle_school" +"task_alias": "Civics (Middle School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_high_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_high_school.yaml new file mode 100644 index 00000000..59aa929d --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_high_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Computer Science (High School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_stem_tasks" +"task": "arabicmmlu_computer_science_high_school" +"task_alias": "Computer Science (High School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_middle_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_middle_school.yaml new file mode 100644 index 00000000..3ecdc106 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_middle_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Computer Science (Middle School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_stem_tasks" +"task": "arabicmmlu_computer_science_middle_school" +"task_alias": "Computer Science (Middle School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_primary_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_primary_school.yaml new file mode 100644 index 00000000..8feec4aa --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_primary_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Computer Science (Primary School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_stem_tasks" +"task": "arabicmmlu_computer_science_primary_school" +"task_alias": "Computer Science (Primary School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_university.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_university.yaml new file mode 100644 index 00000000..327cfab6 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_university.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Computer Science (University)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_stem_tasks" +"task": "arabicmmlu_computer_science_university" +"task_alias": "Computer Science (University)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_driving_test.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_driving_test.yaml index d40c9eb9..ab951dfc 100644 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_driving_test.yaml +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_driving_test.yaml @@ -1,5 +1,5 @@ "dataset_name": "Driving Test" -"tag": "arabicmmlu_other_tasks" "include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_other_tasks" "task": "arabicmmlu_driving_test" "task_alias": "Driving Test" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_economics_high_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_economics_high_school.yaml new file mode 100644 index 00000000..78cba021 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_economics_high_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Economics (High School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_economics_high_school" +"task_alias": "Economics (High School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_economics_middle_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_economics_middle_school.yaml new file mode 100644 index 00000000..ed004b34 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_economics_middle_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Economics (Middle School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_economics_middle_school" +"task_alias": "Economics (Middle School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_economics_university.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_economics_university.yaml new file mode 100644 index 00000000..76bfe4f1 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_economics_university.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Economics (University)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_economics_university" +"task_alias": "Economics (University)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge.yaml index fbd8839d..8ac6e710 100644 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge.yaml +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge.yaml @@ -1,5 +1,5 @@ "dataset_name": "General Knowledge" -"tag": "arabicmmlu_other_tasks" "include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_other_tasks" "task": "arabicmmlu_general_knowledge" "task_alias": "General Knowledge" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge_middle_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge_middle_school.yaml new file mode 100644 index 00000000..a6e4b7c9 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge_middle_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "General Knowledge (Middle School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_other_tasks" +"task": "arabicmmlu_general_knowledge_middle_school" +"task_alias": "General Knowledge (Middle School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge_primary_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge_primary_school.yaml new file mode 100644 index 00000000..07358299 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge_primary_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "General Knowledge (Primary School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_other_tasks" +"task": "arabicmmlu_general_knowledge_primary_school" +"task_alias": "General Knowledge (Primary School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_geography_high_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_geography_high_school.yaml new file mode 100644 index 00000000..b6264fc4 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_geography_high_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Geography (High School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_geography_high_school" +"task_alias": "Geography (High School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_geography_middle_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_geography_middle_school.yaml new file mode 100644 index 00000000..6483749f --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_geography_middle_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Geography (Middle School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_geography_middle_school" +"task_alias": "Geography (Middle School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_geography_primary_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_geography_primary_school.yaml new file mode 100644 index 00000000..1465fb05 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_geography_primary_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Geography (Primary School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_geography_primary_school" +"task_alias": "Geography (Primary School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_arabic_language.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_high_arabic_language.yaml deleted file mode 100644 index 17d17bc8..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_arabic_language.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "High Arabic Language" -"tag": "arabicmmlu_language_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_high_arabic_language" -"task_alias": "High Arabic Language" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_biology.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_high_biology.yaml deleted file mode 100644 index 2b5baf0b..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_biology.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "High Biology" -"tag": "arabicmmlu_stem_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_high_biology" -"task_alias": "High Biology" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_civics.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_high_civics.yaml deleted file mode 100644 index 87050922..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_civics.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "High Civics" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_high_civics" -"task_alias": "High Civics" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_computer_science.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_high_computer_science.yaml deleted file mode 100644 index f1a66a5c..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_computer_science.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "High Computer Science" -"tag": "arabicmmlu_stem_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_high_computer_science" -"task_alias": "High Computer Science" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_economics.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_high_economics.yaml deleted file mode 100644 index a1d6e90f..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_economics.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "High Economics" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_high_economics" -"task_alias": "High Economics" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_geography.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_high_geography.yaml deleted file mode 100644 index ad980432..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_geography.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "High Geography" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_high_geography" -"task_alias": "High Geography" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_history.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_high_history.yaml deleted file mode 100644 index 49c82669..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_history.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "High History" -"tag": "arabicmmlu_humanities_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_high_history" -"task_alias": "High History" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_islamic_studies.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_high_islamic_studies.yaml deleted file mode 100644 index 15b5358b..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_islamic_studies.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "High Islamic Studies" -"tag": "arabicmmlu_humanities_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_high_islamic_studies" -"task_alias": "High Islamic Studies" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_philosophy.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_high_philosophy.yaml deleted file mode 100644 index e0b20e30..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_philosophy.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "High Philosophy" -"tag": "arabicmmlu_humanities_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_high_philosophy" -"task_alias": "High Philosophy" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_physics.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_high_physics.yaml deleted file mode 100644 index a7fe5ecc..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_physics.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "High Physics" -"tag": "arabicmmlu_stem_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_high_physics" -"task_alias": "High Physics" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_history_high_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_history_high_school.yaml new file mode 100644 index 00000000..b97a081a --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_history_high_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "History (High School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_humanities_tasks" +"task": "arabicmmlu_history_high_school" +"task_alias": "History (High School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_history_middle_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_history_middle_school.yaml new file mode 100644 index 00000000..3435604a --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_history_middle_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "History (Middle School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_humanities_tasks" +"task": "arabicmmlu_history_middle_school" +"task_alias": "History (Middle School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_history_primary_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_history_primary_school.yaml new file mode 100644 index 00000000..c156ff52 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_history_primary_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "History (Primary School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_humanities_tasks" +"task": "arabicmmlu_history_primary_school" +"task_alias": "History (Primary School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies.yaml index bacd5ace..4d5020a5 100644 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies.yaml +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies.yaml @@ -1,5 +1,5 @@ "dataset_name": "Islamic Studies" -"tag": "arabicmmlu_humanities_tasks" "include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_humanities_tasks" "task": "arabicmmlu_islamic_studies" "task_alias": "Islamic Studies" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_high_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_high_school.yaml new file mode 100644 index 00000000..5bae042f --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_high_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Islamic Studies (High School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_humanities_tasks" +"task": "arabicmmlu_islamic_studies_high_school" +"task_alias": "Islamic Studies (High School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_middle_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_middle_school.yaml new file mode 100644 index 00000000..af192fc1 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_middle_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Islamic Studies (Middle School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_humanities_tasks" +"task": "arabicmmlu_islamic_studies_middle_school" +"task_alias": "Islamic Studies (Middle School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_primary_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_primary_school.yaml new file mode 100644 index 00000000..c4e5d354 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_primary_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Islamic Studies (Primary School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_humanities_tasks" +"task": "arabicmmlu_islamic_studies_primary_school" +"task_alias": "Islamic Studies (Primary School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_law_professional.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_law_professional.yaml new file mode 100644 index 00000000..5e2b6a4a --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_law_professional.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Law (Professional)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_humanities_tasks" +"task": "arabicmmlu_law_professional" +"task_alias": "Law (Professional)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_management_university.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_management_university.yaml new file mode 100644 index 00000000..386c8e6b --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_management_university.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Management (University)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_other_tasks" +"task": "arabicmmlu_management_university" +"task_alias": "Management (University)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_math_primary_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_math_primary_school.yaml new file mode 100644 index 00000000..1df99b8a --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_math_primary_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Math (Primary School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_stem_tasks" +"task": "arabicmmlu_math_primary_school" +"task_alias": "Math (Primary School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_arabic_language.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_arabic_language.yaml deleted file mode 100644 index 14a2ab1a..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_arabic_language.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Middle Arabic Language" -"tag": "arabicmmlu_language_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_middle_arabic_language" -"task_alias": "Middle Arabic Language" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_civics.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_civics.yaml deleted file mode 100644 index 44ba95d4..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_civics.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Middle Civics" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_middle_civics" -"task_alias": "Middle Civics" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_computer_science.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_computer_science.yaml deleted file mode 100644 index 8dd4136f..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_computer_science.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Middle Computer Science" -"tag": "arabicmmlu_stem_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_middle_computer_science" -"task_alias": "Middle Computer Science" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_economics.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_economics.yaml deleted file mode 100644 index 312fa2e3..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_economics.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Middle Economics" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_middle_economics" -"task_alias": "Middle Economics" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_general_knowledge.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_general_knowledge.yaml deleted file mode 100644 index c359d85a..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_general_knowledge.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Middle General Knowledge" -"tag": "arabicmmlu_other_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_middle_general_knowledge" -"task_alias": "Middle General Knowledge" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_geography.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_geography.yaml deleted file mode 100644 index 111b13cf..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_geography.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Middle Geography" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_middle_geography" -"task_alias": "Middle Geography" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_history.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_history.yaml deleted file mode 100644 index 615a2e51..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_history.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Middle History" -"tag": "arabicmmlu_humanities_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_middle_history" -"task_alias": "Middle History" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_islamic_studies.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_islamic_studies.yaml deleted file mode 100644 index 44922360..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_islamic_studies.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Middle Islamic Studies" -"tag": "arabicmmlu_humanities_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_middle_islamic_studies" -"task_alias": "Middle Islamic Studies" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_natural_science.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_natural_science.yaml deleted file mode 100644 index 265cdbaa..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_natural_science.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Middle Natural Science" -"tag": "arabicmmlu_stem_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_middle_natural_science" -"task_alias": "Middle Natural Science" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_social_science.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_social_science.yaml deleted file mode 100644 index 84c247dd..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_social_science.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Middle Social Science" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_middle_social_science" -"task_alias": "Middle Social Science" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_natural_science_middle_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_natural_science_middle_school.yaml new file mode 100644 index 00000000..3b61531d --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_natural_science_middle_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Natural Science (Middle School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_stem_tasks" +"task": "arabicmmlu_natural_science_middle_school" +"task_alias": "Natural Science (Middle School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_natural_science_primary_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_natural_science_primary_school.yaml new file mode 100644 index 00000000..1efd6c9b --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_natural_science_primary_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Natural Science (Primary School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_stem_tasks" +"task": "arabicmmlu_natural_science_primary_school" +"task_alias": "Natural Science (Primary School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_philosophy_high_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_philosophy_high_school.yaml new file mode 100644 index 00000000..66715bb0 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_philosophy_high_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Philosophy (High School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_humanities_tasks" +"task": "arabicmmlu_philosophy_high_school" +"task_alias": "Philosophy (High School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_physics_high_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_physics_high_school.yaml new file mode 100644 index 00000000..00ecf8ad --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_physics_high_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Physics (High School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_stem_tasks" +"task": "arabicmmlu_physics_high_school" +"task_alias": "Physics (High School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_political_science_university.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_political_science_university.yaml new file mode 100644 index 00000000..1f64125f --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_political_science_university.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Political Science (University)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_political_science_university" +"task_alias": "Political Science (University)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_arabic_language.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_arabic_language.yaml deleted file mode 100644 index 700bc078..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_arabic_language.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Primary Arabic Language" -"tag": "arabicmmlu_language_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_primary_arabic_language" -"task_alias": "Primary Arabic Language" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_computer_science.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_computer_science.yaml deleted file mode 100644 index b89089cd..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_computer_science.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Primary Computer Science" -"tag": "arabicmmlu_stem_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_primary_computer_science" -"task_alias": "Primary Computer Science" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_general_knowledge.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_general_knowledge.yaml deleted file mode 100644 index 85dd0b7f..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_general_knowledge.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Primary General Knowledge" -"tag": "arabicmmlu_other_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_primary_general_knowledge" -"task_alias": "Primary General Knowledge" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_geography.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_geography.yaml deleted file mode 100644 index f7efc487..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_geography.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Primary Geography" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_primary_geography" -"task_alias": "Primary Geography" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_history.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_history.yaml deleted file mode 100644 index f7d69ca9..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_history.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Primary History" -"tag": "arabicmmlu_humanities_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_primary_history" -"task_alias": "Primary History" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_islamic_studies.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_islamic_studies.yaml deleted file mode 100644 index b36cd640..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_islamic_studies.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Primary Islamic Studies" -"tag": "arabicmmlu_humanities_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_primary_islamic_studies" -"task_alias": "Primary Islamic Studies" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_math.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_math.yaml deleted file mode 100644 index 0e53adcf..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_math.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Primary Math" -"tag": "arabicmmlu_stem_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_primary_math" -"task_alias": "Primary Math" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_natural_science.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_natural_science.yaml deleted file mode 100644 index 4e208c76..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_natural_science.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Primary Natural Science" -"tag": "arabicmmlu_stem_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_primary_natural_science" -"task_alias": "Primary Natural Science" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_social_science.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_social_science.yaml deleted file mode 100644 index fee4fe5d..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_social_science.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Primary Social Science" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_primary_social_science" -"task_alias": "Primary Social Science" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_prof_law.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_prof_law.yaml deleted file mode 100644 index 20bf6c5f..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_prof_law.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Prof Law" -"tag": "arabicmmlu_humanities_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_prof_law" -"task_alias": "Prof Law" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_social_science_middle_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_social_science_middle_school.yaml new file mode 100644 index 00000000..b876649f --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_social_science_middle_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Social Science (Middle School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_social_science_middle_school" +"task_alias": "Social Science (Middle School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_social_science_primary_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_social_science_primary_school.yaml new file mode 100644 index 00000000..6f688480 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_social_science_primary_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Social Science (Primary School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_social_science_primary_school" +"task_alias": "Social Science (Primary School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_accounting.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_accounting.yaml deleted file mode 100644 index 6d1d9412..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_accounting.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Univ Accounting" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_univ_accounting" -"task_alias": "Univ Accounting" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_computer_science.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_computer_science.yaml deleted file mode 100644 index 42e7e89a..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_computer_science.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Univ Computer Science" -"tag": "arabicmmlu_stem_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_univ_computer_science" -"task_alias": "Univ Computer Science" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_economics.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_economics.yaml deleted file mode 100644 index 21015ffa..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_economics.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Univ Economics" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_univ_economics" -"task_alias": "Univ Economics" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_management.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_management.yaml deleted file mode 100644 index e69ad74b..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_management.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Univ Management" -"tag": "arabicmmlu_other_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_univ_management" -"task_alias": "Univ Management" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_political_science.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_political_science.yaml deleted file mode 100644 index bb85a104..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_political_science.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Univ Political Science" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_univ_political_science" -"task_alias": "Univ Political Science" diff --git a/lm_eval/tasks/arabicmmlu/utils.py b/lm_eval/tasks/arabicmmlu/utils.py index 2c476131..a572489e 100644 --- a/lm_eval/tasks/arabicmmlu/utils.py +++ b/lm_eval/tasks/arabicmmlu/utils.py @@ -23,7 +23,7 @@ def doc_to_text(doc): question = ( doc["Question"] - if doc["Context"] == "" + if not doc["Context"] else f"{doc['Context']}\n\n{doc['Question']}" ) -- GitLab From 3a4e46741749a8c6d7f702e015285653bc1acdb0 Mon Sep 17 00:00:00 2001 From: Minho Ryu Date: Tue, 21 Jan 2025 06:04:00 +0900 Subject: [PATCH 05/19] apply precommit (#2636) --- lm_eval/tasks/global_mmlu/README.md | 19 ++++- .../global_mmlu/{ => default}/_default_yaml | 0 .../{ => default}/_generate_configs.py | 0 .../{ => default}/global_mmlu_ar.yaml | 0 .../{ => default}/global_mmlu_bn.yaml | 0 .../{ => default}/global_mmlu_de.yaml | 0 .../{ => default}/global_mmlu_en.yaml | 0 .../{ => default}/global_mmlu_es.yaml | 0 .../{ => default}/global_mmlu_fr.yaml | 0 .../{ => default}/global_mmlu_hi.yaml | 0 .../{ => default}/global_mmlu_id.yaml | 0 .../{ => default}/global_mmlu_it.yaml | 0 .../{ => default}/global_mmlu_ja.yaml | 0 .../{ => default}/global_mmlu_ko.yaml | 0 .../{ => default}/global_mmlu_pt.yaml | 0 .../{ => default}/global_mmlu_sw.yaml | 0 .../{ => default}/global_mmlu_yo.yaml | 0 .../{ => default}/global_mmlu_zh.yaml | 0 .../global_mmlu/full/am/_am_template_yaml | 16 ++++ .../full/am/_global_mmlu_full_am.yaml | 11 +++ .../am/_global_mmlu_full_am_humanities.yaml | 8 ++ .../full/am/_global_mmlu_full_am_other.yaml | 8 ++ .../_global_mmlu_full_am_social_sciences.yaml | 8 ++ .../full/am/_global_mmlu_full_am_stem.yaml | 8 ++ .../global_mmlu_full_am_abstract_algebra.yaml | 5 ++ .../full/am/global_mmlu_full_am_anatomy.yaml | 5 ++ .../am/global_mmlu_full_am_astronomy.yaml | 5 ++ .../global_mmlu_full_am_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_am_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_am_college_biology.yaml | 5 ++ ...global_mmlu_full_am_college_chemistry.yaml | 5 ++ ...mmlu_full_am_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_am_college_mathematics.yaml | 5 ++ .../global_mmlu_full_am_college_medicine.yaml | 5 ++ .../global_mmlu_full_am_college_physics.yaml | 5 ++ ...global_mmlu_full_am_computer_security.yaml | 5 ++ ...lobal_mmlu_full_am_conceptual_physics.yaml | 5 ++ .../am/global_mmlu_full_am_econometrics.yaml | 5 ++ ...l_mmlu_full_am_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_am_elementary_mathematics.yaml | 5 ++ .../am/global_mmlu_full_am_formal_logic.yaml | 5 ++ .../am/global_mmlu_full_am_global_facts.yaml | 5 ++ ...obal_mmlu_full_am_high_school_biology.yaml | 5 ++ ...al_mmlu_full_am_high_school_chemistry.yaml | 5 ++ ..._full_am_high_school_computer_science.yaml | 5 ++ ..._full_am_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_am_high_school_geography.yaml | 5 ++ ...m_high_school_government_and_politics.yaml | 5 ++ ...lu_full_am_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_am_high_school_mathematics.yaml | 5 ++ ...lu_full_am_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_am_high_school_physics.yaml | 5 ++ ...l_mmlu_full_am_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_am_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_am_high_school_us_history.yaml | 5 ++ ...mlu_full_am_high_school_world_history.yaml | 5 ++ .../am/global_mmlu_full_am_human_aging.yaml | 5 ++ .../global_mmlu_full_am_human_sexuality.yaml | 5 ++ ...global_mmlu_full_am_international_law.yaml | 5 ++ .../am/global_mmlu_full_am_jurisprudence.yaml | 5 ++ ...global_mmlu_full_am_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_am_machine_learning.yaml | 5 ++ .../am/global_mmlu_full_am_management.yaml | 5 ++ .../am/global_mmlu_full_am_marketing.yaml | 5 ++ .../global_mmlu_full_am_medical_genetics.yaml | 5 ++ .../am/global_mmlu_full_am_miscellaneous.yaml | 5 ++ .../global_mmlu_full_am_moral_disputes.yaml | 5 ++ .../global_mmlu_full_am_moral_scenarios.yaml | 5 ++ .../am/global_mmlu_full_am_nutrition.yaml | 5 ++ .../am/global_mmlu_full_am_philosophy.yaml | 5 ++ .../am/global_mmlu_full_am_prehistory.yaml | 5 ++ ..._mmlu_full_am_professional_accounting.yaml | 5 ++ .../global_mmlu_full_am_professional_law.yaml | 5 ++ ...al_mmlu_full_am_professional_medicine.yaml | 5 ++ ..._mmlu_full_am_professional_psychology.yaml | 5 ++ .../global_mmlu_full_am_public_relations.yaml | 5 ++ .../global_mmlu_full_am_security_studies.yaml | 5 ++ .../am/global_mmlu_full_am_sociology.yaml | 5 ++ ...global_mmlu_full_am_us_foreign_policy.yaml | 5 ++ .../full/am/global_mmlu_full_am_virology.yaml | 5 ++ .../global_mmlu_full_am_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/am/utils.py | 73 +++++++++++++++++++ .../global_mmlu/full/ar/_ar_template_yaml | 16 ++++ .../full/ar/_global_mmlu_full_ar.yaml | 11 +++ .../ar/_global_mmlu_full_ar_humanities.yaml | 8 ++ .../full/ar/_global_mmlu_full_ar_other.yaml | 8 ++ .../_global_mmlu_full_ar_social_sciences.yaml | 8 ++ .../full/ar/_global_mmlu_full_ar_stem.yaml | 8 ++ .../global_mmlu_full_ar_abstract_algebra.yaml | 5 ++ .../full/ar/global_mmlu_full_ar_anatomy.yaml | 5 ++ .../ar/global_mmlu_full_ar_astronomy.yaml | 5 ++ .../global_mmlu_full_ar_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_ar_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_ar_college_biology.yaml | 5 ++ ...global_mmlu_full_ar_college_chemistry.yaml | 5 ++ ...mmlu_full_ar_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_ar_college_mathematics.yaml | 5 ++ .../global_mmlu_full_ar_college_medicine.yaml | 5 ++ .../global_mmlu_full_ar_college_physics.yaml | 5 ++ ...global_mmlu_full_ar_computer_security.yaml | 5 ++ ...lobal_mmlu_full_ar_conceptual_physics.yaml | 5 ++ .../ar/global_mmlu_full_ar_econometrics.yaml | 5 ++ ...l_mmlu_full_ar_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_ar_elementary_mathematics.yaml | 5 ++ .../ar/global_mmlu_full_ar_formal_logic.yaml | 5 ++ .../ar/global_mmlu_full_ar_global_facts.yaml | 5 ++ ...obal_mmlu_full_ar_high_school_biology.yaml | 5 ++ ...al_mmlu_full_ar_high_school_chemistry.yaml | 5 ++ ..._full_ar_high_school_computer_science.yaml | 5 ++ ..._full_ar_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_ar_high_school_geography.yaml | 5 ++ ...r_high_school_government_and_politics.yaml | 5 ++ ...lu_full_ar_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_ar_high_school_mathematics.yaml | 5 ++ ...lu_full_ar_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_ar_high_school_physics.yaml | 5 ++ ...l_mmlu_full_ar_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_ar_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_ar_high_school_us_history.yaml | 5 ++ ...mlu_full_ar_high_school_world_history.yaml | 5 ++ .../ar/global_mmlu_full_ar_human_aging.yaml | 5 ++ .../global_mmlu_full_ar_human_sexuality.yaml | 5 ++ ...global_mmlu_full_ar_international_law.yaml | 5 ++ .../ar/global_mmlu_full_ar_jurisprudence.yaml | 5 ++ ...global_mmlu_full_ar_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_ar_machine_learning.yaml | 5 ++ .../ar/global_mmlu_full_ar_management.yaml | 5 ++ .../ar/global_mmlu_full_ar_marketing.yaml | 5 ++ .../global_mmlu_full_ar_medical_genetics.yaml | 5 ++ .../ar/global_mmlu_full_ar_miscellaneous.yaml | 5 ++ .../global_mmlu_full_ar_moral_disputes.yaml | 5 ++ .../global_mmlu_full_ar_moral_scenarios.yaml | 5 ++ .../ar/global_mmlu_full_ar_nutrition.yaml | 5 ++ .../ar/global_mmlu_full_ar_philosophy.yaml | 5 ++ .../ar/global_mmlu_full_ar_prehistory.yaml | 5 ++ ..._mmlu_full_ar_professional_accounting.yaml | 5 ++ .../global_mmlu_full_ar_professional_law.yaml | 5 ++ ...al_mmlu_full_ar_professional_medicine.yaml | 5 ++ ..._mmlu_full_ar_professional_psychology.yaml | 5 ++ .../global_mmlu_full_ar_public_relations.yaml | 5 ++ .../global_mmlu_full_ar_security_studies.yaml | 5 ++ .../ar/global_mmlu_full_ar_sociology.yaml | 5 ++ ...global_mmlu_full_ar_us_foreign_policy.yaml | 5 ++ .../full/ar/global_mmlu_full_ar_virology.yaml | 5 ++ .../global_mmlu_full_ar_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/ar/utils.py | 73 +++++++++++++++++++ .../global_mmlu/full/bn/_bn_template_yaml | 16 ++++ .../full/bn/_global_mmlu_full_bn.yaml | 11 +++ .../bn/_global_mmlu_full_bn_humanities.yaml | 8 ++ .../full/bn/_global_mmlu_full_bn_other.yaml | 8 ++ .../_global_mmlu_full_bn_social_sciences.yaml | 8 ++ .../full/bn/_global_mmlu_full_bn_stem.yaml | 8 ++ .../global_mmlu_full_bn_abstract_algebra.yaml | 5 ++ .../full/bn/global_mmlu_full_bn_anatomy.yaml | 5 ++ .../bn/global_mmlu_full_bn_astronomy.yaml | 5 ++ .../global_mmlu_full_bn_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_bn_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_bn_college_biology.yaml | 5 ++ ...global_mmlu_full_bn_college_chemistry.yaml | 5 ++ ...mmlu_full_bn_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_bn_college_mathematics.yaml | 5 ++ .../global_mmlu_full_bn_college_medicine.yaml | 5 ++ .../global_mmlu_full_bn_college_physics.yaml | 5 ++ ...global_mmlu_full_bn_computer_security.yaml | 5 ++ ...lobal_mmlu_full_bn_conceptual_physics.yaml | 5 ++ .../bn/global_mmlu_full_bn_econometrics.yaml | 5 ++ ...l_mmlu_full_bn_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_bn_elementary_mathematics.yaml | 5 ++ .../bn/global_mmlu_full_bn_formal_logic.yaml | 5 ++ .../bn/global_mmlu_full_bn_global_facts.yaml | 5 ++ ...obal_mmlu_full_bn_high_school_biology.yaml | 5 ++ ...al_mmlu_full_bn_high_school_chemistry.yaml | 5 ++ ..._full_bn_high_school_computer_science.yaml | 5 ++ ..._full_bn_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_bn_high_school_geography.yaml | 5 ++ ...n_high_school_government_and_politics.yaml | 5 ++ ...lu_full_bn_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_bn_high_school_mathematics.yaml | 5 ++ ...lu_full_bn_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_bn_high_school_physics.yaml | 5 ++ ...l_mmlu_full_bn_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_bn_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_bn_high_school_us_history.yaml | 5 ++ ...mlu_full_bn_high_school_world_history.yaml | 5 ++ .../bn/global_mmlu_full_bn_human_aging.yaml | 5 ++ .../global_mmlu_full_bn_human_sexuality.yaml | 5 ++ ...global_mmlu_full_bn_international_law.yaml | 5 ++ .../bn/global_mmlu_full_bn_jurisprudence.yaml | 5 ++ ...global_mmlu_full_bn_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_bn_machine_learning.yaml | 5 ++ .../bn/global_mmlu_full_bn_management.yaml | 5 ++ .../bn/global_mmlu_full_bn_marketing.yaml | 5 ++ .../global_mmlu_full_bn_medical_genetics.yaml | 5 ++ .../bn/global_mmlu_full_bn_miscellaneous.yaml | 5 ++ .../global_mmlu_full_bn_moral_disputes.yaml | 5 ++ .../global_mmlu_full_bn_moral_scenarios.yaml | 5 ++ .../bn/global_mmlu_full_bn_nutrition.yaml | 5 ++ .../bn/global_mmlu_full_bn_philosophy.yaml | 5 ++ .../bn/global_mmlu_full_bn_prehistory.yaml | 5 ++ ..._mmlu_full_bn_professional_accounting.yaml | 5 ++ .../global_mmlu_full_bn_professional_law.yaml | 5 ++ ...al_mmlu_full_bn_professional_medicine.yaml | 5 ++ ..._mmlu_full_bn_professional_psychology.yaml | 5 ++ .../global_mmlu_full_bn_public_relations.yaml | 5 ++ .../global_mmlu_full_bn_security_studies.yaml | 5 ++ .../bn/global_mmlu_full_bn_sociology.yaml | 5 ++ ...global_mmlu_full_bn_us_foreign_policy.yaml | 5 ++ .../full/bn/global_mmlu_full_bn_virology.yaml | 5 ++ .../global_mmlu_full_bn_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/bn/utils.py | 73 +++++++++++++++++++ .../global_mmlu/full/cs/_cs_template_yaml | 16 ++++ .../full/cs/_global_mmlu_full_cs.yaml | 11 +++ .../cs/_global_mmlu_full_cs_humanities.yaml | 8 ++ .../full/cs/_global_mmlu_full_cs_other.yaml | 8 ++ .../_global_mmlu_full_cs_social_sciences.yaml | 8 ++ .../full/cs/_global_mmlu_full_cs_stem.yaml | 8 ++ .../global_mmlu_full_cs_abstract_algebra.yaml | 5 ++ .../full/cs/global_mmlu_full_cs_anatomy.yaml | 5 ++ .../cs/global_mmlu_full_cs_astronomy.yaml | 5 ++ .../global_mmlu_full_cs_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_cs_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_cs_college_biology.yaml | 5 ++ ...global_mmlu_full_cs_college_chemistry.yaml | 5 ++ ...mmlu_full_cs_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_cs_college_mathematics.yaml | 5 ++ .../global_mmlu_full_cs_college_medicine.yaml | 5 ++ .../global_mmlu_full_cs_college_physics.yaml | 5 ++ ...global_mmlu_full_cs_computer_security.yaml | 5 ++ ...lobal_mmlu_full_cs_conceptual_physics.yaml | 5 ++ .../cs/global_mmlu_full_cs_econometrics.yaml | 5 ++ ...l_mmlu_full_cs_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_cs_elementary_mathematics.yaml | 5 ++ .../cs/global_mmlu_full_cs_formal_logic.yaml | 5 ++ .../cs/global_mmlu_full_cs_global_facts.yaml | 5 ++ ...obal_mmlu_full_cs_high_school_biology.yaml | 5 ++ ...al_mmlu_full_cs_high_school_chemistry.yaml | 5 ++ ..._full_cs_high_school_computer_science.yaml | 5 ++ ..._full_cs_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_cs_high_school_geography.yaml | 5 ++ ...s_high_school_government_and_politics.yaml | 5 ++ ...lu_full_cs_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_cs_high_school_mathematics.yaml | 5 ++ ...lu_full_cs_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_cs_high_school_physics.yaml | 5 ++ ...l_mmlu_full_cs_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_cs_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_cs_high_school_us_history.yaml | 5 ++ ...mlu_full_cs_high_school_world_history.yaml | 5 ++ .../cs/global_mmlu_full_cs_human_aging.yaml | 5 ++ .../global_mmlu_full_cs_human_sexuality.yaml | 5 ++ ...global_mmlu_full_cs_international_law.yaml | 5 ++ .../cs/global_mmlu_full_cs_jurisprudence.yaml | 5 ++ ...global_mmlu_full_cs_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_cs_machine_learning.yaml | 5 ++ .../cs/global_mmlu_full_cs_management.yaml | 5 ++ .../cs/global_mmlu_full_cs_marketing.yaml | 5 ++ .../global_mmlu_full_cs_medical_genetics.yaml | 5 ++ .../cs/global_mmlu_full_cs_miscellaneous.yaml | 5 ++ .../global_mmlu_full_cs_moral_disputes.yaml | 5 ++ .../global_mmlu_full_cs_moral_scenarios.yaml | 5 ++ .../cs/global_mmlu_full_cs_nutrition.yaml | 5 ++ .../cs/global_mmlu_full_cs_philosophy.yaml | 5 ++ .../cs/global_mmlu_full_cs_prehistory.yaml | 5 ++ ..._mmlu_full_cs_professional_accounting.yaml | 5 ++ .../global_mmlu_full_cs_professional_law.yaml | 5 ++ ...al_mmlu_full_cs_professional_medicine.yaml | 5 ++ ..._mmlu_full_cs_professional_psychology.yaml | 5 ++ .../global_mmlu_full_cs_public_relations.yaml | 5 ++ .../global_mmlu_full_cs_security_studies.yaml | 5 ++ .../cs/global_mmlu_full_cs_sociology.yaml | 5 ++ ...global_mmlu_full_cs_us_foreign_policy.yaml | 5 ++ .../full/cs/global_mmlu_full_cs_virology.yaml | 5 ++ .../global_mmlu_full_cs_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/cs/utils.py | 73 +++++++++++++++++++ .../global_mmlu/full/de/_de_template_yaml | 16 ++++ .../full/de/_global_mmlu_full_de.yaml | 11 +++ .../de/_global_mmlu_full_de_humanities.yaml | 8 ++ .../full/de/_global_mmlu_full_de_other.yaml | 8 ++ .../_global_mmlu_full_de_social_sciences.yaml | 8 ++ .../full/de/_global_mmlu_full_de_stem.yaml | 8 ++ .../global_mmlu_full_de_abstract_algebra.yaml | 5 ++ .../full/de/global_mmlu_full_de_anatomy.yaml | 5 ++ .../de/global_mmlu_full_de_astronomy.yaml | 5 ++ .../global_mmlu_full_de_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_de_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_de_college_biology.yaml | 5 ++ ...global_mmlu_full_de_college_chemistry.yaml | 5 ++ ...mmlu_full_de_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_de_college_mathematics.yaml | 5 ++ .../global_mmlu_full_de_college_medicine.yaml | 5 ++ .../global_mmlu_full_de_college_physics.yaml | 5 ++ ...global_mmlu_full_de_computer_security.yaml | 5 ++ ...lobal_mmlu_full_de_conceptual_physics.yaml | 5 ++ .../de/global_mmlu_full_de_econometrics.yaml | 5 ++ ...l_mmlu_full_de_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_de_elementary_mathematics.yaml | 5 ++ .../de/global_mmlu_full_de_formal_logic.yaml | 5 ++ .../de/global_mmlu_full_de_global_facts.yaml | 5 ++ ...obal_mmlu_full_de_high_school_biology.yaml | 5 ++ ...al_mmlu_full_de_high_school_chemistry.yaml | 5 ++ ..._full_de_high_school_computer_science.yaml | 5 ++ ..._full_de_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_de_high_school_geography.yaml | 5 ++ ...e_high_school_government_and_politics.yaml | 5 ++ ...lu_full_de_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_de_high_school_mathematics.yaml | 5 ++ ...lu_full_de_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_de_high_school_physics.yaml | 5 ++ ...l_mmlu_full_de_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_de_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_de_high_school_us_history.yaml | 5 ++ ...mlu_full_de_high_school_world_history.yaml | 5 ++ .../de/global_mmlu_full_de_human_aging.yaml | 5 ++ .../global_mmlu_full_de_human_sexuality.yaml | 5 ++ ...global_mmlu_full_de_international_law.yaml | 5 ++ .../de/global_mmlu_full_de_jurisprudence.yaml | 5 ++ ...global_mmlu_full_de_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_de_machine_learning.yaml | 5 ++ .../de/global_mmlu_full_de_management.yaml | 5 ++ .../de/global_mmlu_full_de_marketing.yaml | 5 ++ .../global_mmlu_full_de_medical_genetics.yaml | 5 ++ .../de/global_mmlu_full_de_miscellaneous.yaml | 5 ++ .../global_mmlu_full_de_moral_disputes.yaml | 5 ++ .../global_mmlu_full_de_moral_scenarios.yaml | 5 ++ .../de/global_mmlu_full_de_nutrition.yaml | 5 ++ .../de/global_mmlu_full_de_philosophy.yaml | 5 ++ .../de/global_mmlu_full_de_prehistory.yaml | 5 ++ ..._mmlu_full_de_professional_accounting.yaml | 5 ++ .../global_mmlu_full_de_professional_law.yaml | 5 ++ ...al_mmlu_full_de_professional_medicine.yaml | 5 ++ ..._mmlu_full_de_professional_psychology.yaml | 5 ++ .../global_mmlu_full_de_public_relations.yaml | 5 ++ .../global_mmlu_full_de_security_studies.yaml | 5 ++ .../de/global_mmlu_full_de_sociology.yaml | 5 ++ ...global_mmlu_full_de_us_foreign_policy.yaml | 5 ++ .../full/de/global_mmlu_full_de_virology.yaml | 5 ++ .../global_mmlu_full_de_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/de/utils.py | 73 +++++++++++++++++++ .../global_mmlu/full/el/_el_template_yaml | 16 ++++ .../full/el/_global_mmlu_full_el.yaml | 11 +++ .../el/_global_mmlu_full_el_humanities.yaml | 8 ++ .../full/el/_global_mmlu_full_el_other.yaml | 8 ++ .../_global_mmlu_full_el_social_sciences.yaml | 8 ++ .../full/el/_global_mmlu_full_el_stem.yaml | 8 ++ .../global_mmlu_full_el_abstract_algebra.yaml | 5 ++ .../full/el/global_mmlu_full_el_anatomy.yaml | 5 ++ .../el/global_mmlu_full_el_astronomy.yaml | 5 ++ .../global_mmlu_full_el_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_el_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_el_college_biology.yaml | 5 ++ ...global_mmlu_full_el_college_chemistry.yaml | 5 ++ ...mmlu_full_el_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_el_college_mathematics.yaml | 5 ++ .../global_mmlu_full_el_college_medicine.yaml | 5 ++ .../global_mmlu_full_el_college_physics.yaml | 5 ++ ...global_mmlu_full_el_computer_security.yaml | 5 ++ ...lobal_mmlu_full_el_conceptual_physics.yaml | 5 ++ .../el/global_mmlu_full_el_econometrics.yaml | 5 ++ ...l_mmlu_full_el_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_el_elementary_mathematics.yaml | 5 ++ .../el/global_mmlu_full_el_formal_logic.yaml | 5 ++ .../el/global_mmlu_full_el_global_facts.yaml | 5 ++ ...obal_mmlu_full_el_high_school_biology.yaml | 5 ++ ...al_mmlu_full_el_high_school_chemistry.yaml | 5 ++ ..._full_el_high_school_computer_science.yaml | 5 ++ ..._full_el_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_el_high_school_geography.yaml | 5 ++ ...l_high_school_government_and_politics.yaml | 5 ++ ...lu_full_el_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_el_high_school_mathematics.yaml | 5 ++ ...lu_full_el_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_el_high_school_physics.yaml | 5 ++ ...l_mmlu_full_el_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_el_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_el_high_school_us_history.yaml | 5 ++ ...mlu_full_el_high_school_world_history.yaml | 5 ++ .../el/global_mmlu_full_el_human_aging.yaml | 5 ++ .../global_mmlu_full_el_human_sexuality.yaml | 5 ++ ...global_mmlu_full_el_international_law.yaml | 5 ++ .../el/global_mmlu_full_el_jurisprudence.yaml | 5 ++ ...global_mmlu_full_el_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_el_machine_learning.yaml | 5 ++ .../el/global_mmlu_full_el_management.yaml | 5 ++ .../el/global_mmlu_full_el_marketing.yaml | 5 ++ .../global_mmlu_full_el_medical_genetics.yaml | 5 ++ .../el/global_mmlu_full_el_miscellaneous.yaml | 5 ++ .../global_mmlu_full_el_moral_disputes.yaml | 5 ++ .../global_mmlu_full_el_moral_scenarios.yaml | 5 ++ .../el/global_mmlu_full_el_nutrition.yaml | 5 ++ .../el/global_mmlu_full_el_philosophy.yaml | 5 ++ .../el/global_mmlu_full_el_prehistory.yaml | 5 ++ ..._mmlu_full_el_professional_accounting.yaml | 5 ++ .../global_mmlu_full_el_professional_law.yaml | 5 ++ ...al_mmlu_full_el_professional_medicine.yaml | 5 ++ ..._mmlu_full_el_professional_psychology.yaml | 5 ++ .../global_mmlu_full_el_public_relations.yaml | 5 ++ .../global_mmlu_full_el_security_studies.yaml | 5 ++ .../el/global_mmlu_full_el_sociology.yaml | 5 ++ ...global_mmlu_full_el_us_foreign_policy.yaml | 5 ++ .../full/el/global_mmlu_full_el_virology.yaml | 5 ++ .../global_mmlu_full_el_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/el/utils.py | 73 +++++++++++++++++++ .../global_mmlu/full/en/_en_template_yaml | 16 ++++ .../full/en/_global_mmlu_full_en.yaml | 11 +++ .../en/_global_mmlu_full_en_humanities.yaml | 8 ++ .../full/en/_global_mmlu_full_en_other.yaml | 8 ++ .../_global_mmlu_full_en_social_sciences.yaml | 8 ++ .../full/en/_global_mmlu_full_en_stem.yaml | 8 ++ .../global_mmlu_full_en_abstract_algebra.yaml | 5 ++ .../full/en/global_mmlu_full_en_anatomy.yaml | 5 ++ .../en/global_mmlu_full_en_astronomy.yaml | 5 ++ .../global_mmlu_full_en_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_en_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_en_college_biology.yaml | 5 ++ ...global_mmlu_full_en_college_chemistry.yaml | 5 ++ ...mmlu_full_en_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_en_college_mathematics.yaml | 5 ++ .../global_mmlu_full_en_college_medicine.yaml | 5 ++ .../global_mmlu_full_en_college_physics.yaml | 5 ++ ...global_mmlu_full_en_computer_security.yaml | 5 ++ ...lobal_mmlu_full_en_conceptual_physics.yaml | 5 ++ .../en/global_mmlu_full_en_econometrics.yaml | 5 ++ ...l_mmlu_full_en_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_en_elementary_mathematics.yaml | 5 ++ .../en/global_mmlu_full_en_formal_logic.yaml | 5 ++ .../en/global_mmlu_full_en_global_facts.yaml | 5 ++ ...obal_mmlu_full_en_high_school_biology.yaml | 5 ++ ...al_mmlu_full_en_high_school_chemistry.yaml | 5 ++ ..._full_en_high_school_computer_science.yaml | 5 ++ ..._full_en_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_en_high_school_geography.yaml | 5 ++ ...n_high_school_government_and_politics.yaml | 5 ++ ...lu_full_en_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_en_high_school_mathematics.yaml | 5 ++ ...lu_full_en_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_en_high_school_physics.yaml | 5 ++ ...l_mmlu_full_en_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_en_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_en_high_school_us_history.yaml | 5 ++ ...mlu_full_en_high_school_world_history.yaml | 5 ++ .../en/global_mmlu_full_en_human_aging.yaml | 5 ++ .../global_mmlu_full_en_human_sexuality.yaml | 5 ++ ...global_mmlu_full_en_international_law.yaml | 5 ++ .../en/global_mmlu_full_en_jurisprudence.yaml | 5 ++ ...global_mmlu_full_en_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_en_machine_learning.yaml | 5 ++ .../en/global_mmlu_full_en_management.yaml | 5 ++ .../en/global_mmlu_full_en_marketing.yaml | 5 ++ .../global_mmlu_full_en_medical_genetics.yaml | 5 ++ .../en/global_mmlu_full_en_miscellaneous.yaml | 5 ++ .../global_mmlu_full_en_moral_disputes.yaml | 5 ++ .../global_mmlu_full_en_moral_scenarios.yaml | 5 ++ .../en/global_mmlu_full_en_nutrition.yaml | 5 ++ .../en/global_mmlu_full_en_philosophy.yaml | 5 ++ .../en/global_mmlu_full_en_prehistory.yaml | 5 ++ ..._mmlu_full_en_professional_accounting.yaml | 5 ++ .../global_mmlu_full_en_professional_law.yaml | 5 ++ ...al_mmlu_full_en_professional_medicine.yaml | 5 ++ ..._mmlu_full_en_professional_psychology.yaml | 5 ++ .../global_mmlu_full_en_public_relations.yaml | 5 ++ .../global_mmlu_full_en_security_studies.yaml | 5 ++ .../en/global_mmlu_full_en_sociology.yaml | 5 ++ ...global_mmlu_full_en_us_foreign_policy.yaml | 5 ++ .../full/en/global_mmlu_full_en_virology.yaml | 5 ++ .../global_mmlu_full_en_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/en/utils.py | 73 +++++++++++++++++++ .../global_mmlu/full/es/_es_template_yaml | 16 ++++ .../full/es/_global_mmlu_full_es.yaml | 11 +++ .../es/_global_mmlu_full_es_humanities.yaml | 8 ++ .../full/es/_global_mmlu_full_es_other.yaml | 8 ++ .../_global_mmlu_full_es_social_sciences.yaml | 8 ++ .../full/es/_global_mmlu_full_es_stem.yaml | 8 ++ .../global_mmlu_full_es_abstract_algebra.yaml | 5 ++ .../full/es/global_mmlu_full_es_anatomy.yaml | 5 ++ .../es/global_mmlu_full_es_astronomy.yaml | 5 ++ .../global_mmlu_full_es_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_es_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_es_college_biology.yaml | 5 ++ ...global_mmlu_full_es_college_chemistry.yaml | 5 ++ ...mmlu_full_es_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_es_college_mathematics.yaml | 5 ++ .../global_mmlu_full_es_college_medicine.yaml | 5 ++ .../global_mmlu_full_es_college_physics.yaml | 5 ++ ...global_mmlu_full_es_computer_security.yaml | 5 ++ ...lobal_mmlu_full_es_conceptual_physics.yaml | 5 ++ .../es/global_mmlu_full_es_econometrics.yaml | 5 ++ ...l_mmlu_full_es_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_es_elementary_mathematics.yaml | 5 ++ .../es/global_mmlu_full_es_formal_logic.yaml | 5 ++ .../es/global_mmlu_full_es_global_facts.yaml | 5 ++ ...obal_mmlu_full_es_high_school_biology.yaml | 5 ++ ...al_mmlu_full_es_high_school_chemistry.yaml | 5 ++ ..._full_es_high_school_computer_science.yaml | 5 ++ ..._full_es_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_es_high_school_geography.yaml | 5 ++ ...s_high_school_government_and_politics.yaml | 5 ++ ...lu_full_es_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_es_high_school_mathematics.yaml | 5 ++ ...lu_full_es_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_es_high_school_physics.yaml | 5 ++ ...l_mmlu_full_es_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_es_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_es_high_school_us_history.yaml | 5 ++ ...mlu_full_es_high_school_world_history.yaml | 5 ++ .../es/global_mmlu_full_es_human_aging.yaml | 5 ++ .../global_mmlu_full_es_human_sexuality.yaml | 5 ++ ...global_mmlu_full_es_international_law.yaml | 5 ++ .../es/global_mmlu_full_es_jurisprudence.yaml | 5 ++ ...global_mmlu_full_es_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_es_machine_learning.yaml | 5 ++ .../es/global_mmlu_full_es_management.yaml | 5 ++ .../es/global_mmlu_full_es_marketing.yaml | 5 ++ .../global_mmlu_full_es_medical_genetics.yaml | 5 ++ .../es/global_mmlu_full_es_miscellaneous.yaml | 5 ++ .../global_mmlu_full_es_moral_disputes.yaml | 5 ++ .../global_mmlu_full_es_moral_scenarios.yaml | 5 ++ .../es/global_mmlu_full_es_nutrition.yaml | 5 ++ .../es/global_mmlu_full_es_philosophy.yaml | 5 ++ .../es/global_mmlu_full_es_prehistory.yaml | 5 ++ ..._mmlu_full_es_professional_accounting.yaml | 5 ++ .../global_mmlu_full_es_professional_law.yaml | 5 ++ ...al_mmlu_full_es_professional_medicine.yaml | 5 ++ ..._mmlu_full_es_professional_psychology.yaml | 5 ++ .../global_mmlu_full_es_public_relations.yaml | 5 ++ .../global_mmlu_full_es_security_studies.yaml | 5 ++ .../es/global_mmlu_full_es_sociology.yaml | 5 ++ ...global_mmlu_full_es_us_foreign_policy.yaml | 5 ++ .../full/es/global_mmlu_full_es_virology.yaml | 5 ++ .../global_mmlu_full_es_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/es/utils.py | 73 +++++++++++++++++++ .../global_mmlu/full/fa/_fa_template_yaml | 16 ++++ .../full/fa/_global_mmlu_full_fa.yaml | 11 +++ .../fa/_global_mmlu_full_fa_humanities.yaml | 8 ++ .../full/fa/_global_mmlu_full_fa_other.yaml | 8 ++ .../_global_mmlu_full_fa_social_sciences.yaml | 8 ++ .../full/fa/_global_mmlu_full_fa_stem.yaml | 8 ++ .../global_mmlu_full_fa_abstract_algebra.yaml | 5 ++ .../full/fa/global_mmlu_full_fa_anatomy.yaml | 5 ++ .../fa/global_mmlu_full_fa_astronomy.yaml | 5 ++ .../global_mmlu_full_fa_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_fa_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_fa_college_biology.yaml | 5 ++ ...global_mmlu_full_fa_college_chemistry.yaml | 5 ++ ...mmlu_full_fa_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_fa_college_mathematics.yaml | 5 ++ .../global_mmlu_full_fa_college_medicine.yaml | 5 ++ .../global_mmlu_full_fa_college_physics.yaml | 5 ++ ...global_mmlu_full_fa_computer_security.yaml | 5 ++ ...lobal_mmlu_full_fa_conceptual_physics.yaml | 5 ++ .../fa/global_mmlu_full_fa_econometrics.yaml | 5 ++ ...l_mmlu_full_fa_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_fa_elementary_mathematics.yaml | 5 ++ .../fa/global_mmlu_full_fa_formal_logic.yaml | 5 ++ .../fa/global_mmlu_full_fa_global_facts.yaml | 5 ++ ...obal_mmlu_full_fa_high_school_biology.yaml | 5 ++ ...al_mmlu_full_fa_high_school_chemistry.yaml | 5 ++ ..._full_fa_high_school_computer_science.yaml | 5 ++ ..._full_fa_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_fa_high_school_geography.yaml | 5 ++ ...a_high_school_government_and_politics.yaml | 5 ++ ...lu_full_fa_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_fa_high_school_mathematics.yaml | 5 ++ ...lu_full_fa_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_fa_high_school_physics.yaml | 5 ++ ...l_mmlu_full_fa_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_fa_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_fa_high_school_us_history.yaml | 5 ++ ...mlu_full_fa_high_school_world_history.yaml | 5 ++ .../fa/global_mmlu_full_fa_human_aging.yaml | 5 ++ .../global_mmlu_full_fa_human_sexuality.yaml | 5 ++ ...global_mmlu_full_fa_international_law.yaml | 5 ++ .../fa/global_mmlu_full_fa_jurisprudence.yaml | 5 ++ ...global_mmlu_full_fa_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_fa_machine_learning.yaml | 5 ++ .../fa/global_mmlu_full_fa_management.yaml | 5 ++ .../fa/global_mmlu_full_fa_marketing.yaml | 5 ++ .../global_mmlu_full_fa_medical_genetics.yaml | 5 ++ .../fa/global_mmlu_full_fa_miscellaneous.yaml | 5 ++ .../global_mmlu_full_fa_moral_disputes.yaml | 5 ++ .../global_mmlu_full_fa_moral_scenarios.yaml | 5 ++ .../fa/global_mmlu_full_fa_nutrition.yaml | 5 ++ .../fa/global_mmlu_full_fa_philosophy.yaml | 5 ++ .../fa/global_mmlu_full_fa_prehistory.yaml | 5 ++ ..._mmlu_full_fa_professional_accounting.yaml | 5 ++ .../global_mmlu_full_fa_professional_law.yaml | 5 ++ ...al_mmlu_full_fa_professional_medicine.yaml | 5 ++ ..._mmlu_full_fa_professional_psychology.yaml | 5 ++ .../global_mmlu_full_fa_public_relations.yaml | 5 ++ .../global_mmlu_full_fa_security_studies.yaml | 5 ++ .../fa/global_mmlu_full_fa_sociology.yaml | 5 ++ ...global_mmlu_full_fa_us_foreign_policy.yaml | 5 ++ .../full/fa/global_mmlu_full_fa_virology.yaml | 5 ++ .../global_mmlu_full_fa_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/fa/utils.py | 73 +++++++++++++++++++ .../global_mmlu/full/fil/_fil_template_yaml | 16 ++++ .../full/fil/_global_mmlu_full_fil.yaml | 11 +++ .../fil/_global_mmlu_full_fil_humanities.yaml | 8 ++ .../full/fil/_global_mmlu_full_fil_other.yaml | 8 ++ ..._global_mmlu_full_fil_social_sciences.yaml | 8 ++ .../full/fil/_global_mmlu_full_fil_stem.yaml | 8 ++ ...global_mmlu_full_fil_abstract_algebra.yaml | 5 ++ .../fil/global_mmlu_full_fil_anatomy.yaml | 5 ++ .../fil/global_mmlu_full_fil_astronomy.yaml | 5 ++ .../global_mmlu_full_fil_business_ethics.yaml | 5 ++ ...obal_mmlu_full_fil_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_fil_college_biology.yaml | 5 ++ ...lobal_mmlu_full_fil_college_chemistry.yaml | 5 ++ ...mlu_full_fil_college_computer_science.yaml | 5 ++ ...bal_mmlu_full_fil_college_mathematics.yaml | 5 ++ ...global_mmlu_full_fil_college_medicine.yaml | 5 ++ .../global_mmlu_full_fil_college_physics.yaml | 5 ++ ...lobal_mmlu_full_fil_computer_security.yaml | 5 ++ ...obal_mmlu_full_fil_conceptual_physics.yaml | 5 ++ .../global_mmlu_full_fil_econometrics.yaml | 5 ++ ..._mmlu_full_fil_electrical_engineering.yaml | 5 ++ ..._mmlu_full_fil_elementary_mathematics.yaml | 5 ++ .../global_mmlu_full_fil_formal_logic.yaml | 5 ++ .../global_mmlu_full_fil_global_facts.yaml | 5 ++ ...bal_mmlu_full_fil_high_school_biology.yaml | 5 ++ ...l_mmlu_full_fil_high_school_chemistry.yaml | 5 ++ ...full_fil_high_school_computer_science.yaml | 5 ++ ...full_fil_high_school_european_history.yaml | 5 ++ ...l_mmlu_full_fil_high_school_geography.yaml | 5 ++ ...l_high_school_government_and_politics.yaml | 5 ++ ...u_full_fil_high_school_macroeconomics.yaml | 5 ++ ...mmlu_full_fil_high_school_mathematics.yaml | 5 ++ ...u_full_fil_high_school_microeconomics.yaml | 5 ++ ...bal_mmlu_full_fil_high_school_physics.yaml | 5 ++ ..._mmlu_full_fil_high_school_psychology.yaml | 5 ++ ..._mmlu_full_fil_high_school_statistics.yaml | 5 ++ ..._mmlu_full_fil_high_school_us_history.yaml | 5 ++ ...lu_full_fil_high_school_world_history.yaml | 5 ++ .../fil/global_mmlu_full_fil_human_aging.yaml | 5 ++ .../global_mmlu_full_fil_human_sexuality.yaml | 5 ++ ...lobal_mmlu_full_fil_international_law.yaml | 5 ++ .../global_mmlu_full_fil_jurisprudence.yaml | 5 ++ ...lobal_mmlu_full_fil_logical_fallacies.yaml | 5 ++ ...global_mmlu_full_fil_machine_learning.yaml | 5 ++ .../fil/global_mmlu_full_fil_management.yaml | 5 ++ .../fil/global_mmlu_full_fil_marketing.yaml | 5 ++ ...global_mmlu_full_fil_medical_genetics.yaml | 5 ++ .../global_mmlu_full_fil_miscellaneous.yaml | 5 ++ .../global_mmlu_full_fil_moral_disputes.yaml | 5 ++ .../global_mmlu_full_fil_moral_scenarios.yaml | 5 ++ .../fil/global_mmlu_full_fil_nutrition.yaml | 5 ++ .../fil/global_mmlu_full_fil_philosophy.yaml | 5 ++ .../fil/global_mmlu_full_fil_prehistory.yaml | 5 ++ ...mmlu_full_fil_professional_accounting.yaml | 5 ++ ...global_mmlu_full_fil_professional_law.yaml | 5 ++ ...l_mmlu_full_fil_professional_medicine.yaml | 5 ++ ...mmlu_full_fil_professional_psychology.yaml | 5 ++ ...global_mmlu_full_fil_public_relations.yaml | 5 ++ ...global_mmlu_full_fil_security_studies.yaml | 5 ++ .../fil/global_mmlu_full_fil_sociology.yaml | 5 ++ ...lobal_mmlu_full_fil_us_foreign_policy.yaml | 5 ++ .../fil/global_mmlu_full_fil_virology.yaml | 5 ++ .../global_mmlu_full_fil_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/fil/utils.py | 73 +++++++++++++++++++ .../global_mmlu/full/fr/_fr_template_yaml | 16 ++++ .../full/fr/_global_mmlu_full_fr.yaml | 11 +++ .../fr/_global_mmlu_full_fr_humanities.yaml | 8 ++ .../full/fr/_global_mmlu_full_fr_other.yaml | 8 ++ .../_global_mmlu_full_fr_social_sciences.yaml | 8 ++ .../full/fr/_global_mmlu_full_fr_stem.yaml | 8 ++ .../global_mmlu_full_fr_abstract_algebra.yaml | 5 ++ .../full/fr/global_mmlu_full_fr_anatomy.yaml | 5 ++ .../fr/global_mmlu_full_fr_astronomy.yaml | 5 ++ .../global_mmlu_full_fr_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_fr_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_fr_college_biology.yaml | 5 ++ ...global_mmlu_full_fr_college_chemistry.yaml | 5 ++ ...mmlu_full_fr_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_fr_college_mathematics.yaml | 5 ++ .../global_mmlu_full_fr_college_medicine.yaml | 5 ++ .../global_mmlu_full_fr_college_physics.yaml | 5 ++ ...global_mmlu_full_fr_computer_security.yaml | 5 ++ ...lobal_mmlu_full_fr_conceptual_physics.yaml | 5 ++ .../fr/global_mmlu_full_fr_econometrics.yaml | 5 ++ ...l_mmlu_full_fr_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_fr_elementary_mathematics.yaml | 5 ++ .../fr/global_mmlu_full_fr_formal_logic.yaml | 5 ++ .../fr/global_mmlu_full_fr_global_facts.yaml | 5 ++ ...obal_mmlu_full_fr_high_school_biology.yaml | 5 ++ ...al_mmlu_full_fr_high_school_chemistry.yaml | 5 ++ ..._full_fr_high_school_computer_science.yaml | 5 ++ ..._full_fr_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_fr_high_school_geography.yaml | 5 ++ ...r_high_school_government_and_politics.yaml | 5 ++ ...lu_full_fr_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_fr_high_school_mathematics.yaml | 5 ++ ...lu_full_fr_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_fr_high_school_physics.yaml | 5 ++ ...l_mmlu_full_fr_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_fr_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_fr_high_school_us_history.yaml | 5 ++ ...mlu_full_fr_high_school_world_history.yaml | 5 ++ .../fr/global_mmlu_full_fr_human_aging.yaml | 5 ++ .../global_mmlu_full_fr_human_sexuality.yaml | 5 ++ ...global_mmlu_full_fr_international_law.yaml | 5 ++ .../fr/global_mmlu_full_fr_jurisprudence.yaml | 5 ++ ...global_mmlu_full_fr_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_fr_machine_learning.yaml | 5 ++ .../fr/global_mmlu_full_fr_management.yaml | 5 ++ .../fr/global_mmlu_full_fr_marketing.yaml | 5 ++ .../global_mmlu_full_fr_medical_genetics.yaml | 5 ++ .../fr/global_mmlu_full_fr_miscellaneous.yaml | 5 ++ .../global_mmlu_full_fr_moral_disputes.yaml | 5 ++ .../global_mmlu_full_fr_moral_scenarios.yaml | 5 ++ .../fr/global_mmlu_full_fr_nutrition.yaml | 5 ++ .../fr/global_mmlu_full_fr_philosophy.yaml | 5 ++ .../fr/global_mmlu_full_fr_prehistory.yaml | 5 ++ ..._mmlu_full_fr_professional_accounting.yaml | 5 ++ .../global_mmlu_full_fr_professional_law.yaml | 5 ++ ...al_mmlu_full_fr_professional_medicine.yaml | 5 ++ ..._mmlu_full_fr_professional_psychology.yaml | 5 ++ .../global_mmlu_full_fr_public_relations.yaml | 5 ++ .../global_mmlu_full_fr_security_studies.yaml | 5 ++ .../fr/global_mmlu_full_fr_sociology.yaml | 5 ++ ...global_mmlu_full_fr_us_foreign_policy.yaml | 5 ++ .../full/fr/global_mmlu_full_fr_virology.yaml | 5 ++ .../global_mmlu_full_fr_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/fr/utils.py | 73 +++++++++++++++++++ .../full/ha/_global_mmlu_full_ha.yaml | 11 +++ .../ha/_global_mmlu_full_ha_humanities.yaml | 8 ++ .../full/ha/_global_mmlu_full_ha_other.yaml | 8 ++ .../_global_mmlu_full_ha_social_sciences.yaml | 8 ++ .../full/ha/_global_mmlu_full_ha_stem.yaml | 8 ++ .../global_mmlu/full/ha/_ha_template_yaml | 16 ++++ .../global_mmlu_full_ha_abstract_algebra.yaml | 5 ++ .../full/ha/global_mmlu_full_ha_anatomy.yaml | 5 ++ .../ha/global_mmlu_full_ha_astronomy.yaml | 5 ++ .../global_mmlu_full_ha_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_ha_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_ha_college_biology.yaml | 5 ++ ...global_mmlu_full_ha_college_chemistry.yaml | 5 ++ ...mmlu_full_ha_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_ha_college_mathematics.yaml | 5 ++ .../global_mmlu_full_ha_college_medicine.yaml | 5 ++ .../global_mmlu_full_ha_college_physics.yaml | 5 ++ ...global_mmlu_full_ha_computer_security.yaml | 5 ++ ...lobal_mmlu_full_ha_conceptual_physics.yaml | 5 ++ .../ha/global_mmlu_full_ha_econometrics.yaml | 5 ++ ...l_mmlu_full_ha_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_ha_elementary_mathematics.yaml | 5 ++ .../ha/global_mmlu_full_ha_formal_logic.yaml | 5 ++ .../ha/global_mmlu_full_ha_global_facts.yaml | 5 ++ ...obal_mmlu_full_ha_high_school_biology.yaml | 5 ++ ...al_mmlu_full_ha_high_school_chemistry.yaml | 5 ++ ..._full_ha_high_school_computer_science.yaml | 5 ++ ..._full_ha_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_ha_high_school_geography.yaml | 5 ++ ...a_high_school_government_and_politics.yaml | 5 ++ ...lu_full_ha_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_ha_high_school_mathematics.yaml | 5 ++ ...lu_full_ha_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_ha_high_school_physics.yaml | 5 ++ ...l_mmlu_full_ha_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_ha_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_ha_high_school_us_history.yaml | 5 ++ ...mlu_full_ha_high_school_world_history.yaml | 5 ++ .../ha/global_mmlu_full_ha_human_aging.yaml | 5 ++ .../global_mmlu_full_ha_human_sexuality.yaml | 5 ++ ...global_mmlu_full_ha_international_law.yaml | 5 ++ .../ha/global_mmlu_full_ha_jurisprudence.yaml | 5 ++ ...global_mmlu_full_ha_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_ha_machine_learning.yaml | 5 ++ .../ha/global_mmlu_full_ha_management.yaml | 5 ++ .../ha/global_mmlu_full_ha_marketing.yaml | 5 ++ .../global_mmlu_full_ha_medical_genetics.yaml | 5 ++ .../ha/global_mmlu_full_ha_miscellaneous.yaml | 5 ++ .../global_mmlu_full_ha_moral_disputes.yaml | 5 ++ .../global_mmlu_full_ha_moral_scenarios.yaml | 5 ++ .../ha/global_mmlu_full_ha_nutrition.yaml | 5 ++ .../ha/global_mmlu_full_ha_philosophy.yaml | 5 ++ .../ha/global_mmlu_full_ha_prehistory.yaml | 5 ++ ..._mmlu_full_ha_professional_accounting.yaml | 5 ++ .../global_mmlu_full_ha_professional_law.yaml | 5 ++ ...al_mmlu_full_ha_professional_medicine.yaml | 5 ++ ..._mmlu_full_ha_professional_psychology.yaml | 5 ++ .../global_mmlu_full_ha_public_relations.yaml | 5 ++ .../global_mmlu_full_ha_security_studies.yaml | 5 ++ .../ha/global_mmlu_full_ha_sociology.yaml | 5 ++ ...global_mmlu_full_ha_us_foreign_policy.yaml | 5 ++ .../full/ha/global_mmlu_full_ha_virology.yaml | 5 ++ .../global_mmlu_full_ha_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/ha/utils.py | 73 +++++++++++++++++++ .../full/he/_global_mmlu_full_he.yaml | 11 +++ .../he/_global_mmlu_full_he_humanities.yaml | 8 ++ .../full/he/_global_mmlu_full_he_other.yaml | 8 ++ .../_global_mmlu_full_he_social_sciences.yaml | 8 ++ .../full/he/_global_mmlu_full_he_stem.yaml | 8 ++ .../global_mmlu/full/he/_he_template_yaml | 16 ++++ .../global_mmlu_full_he_abstract_algebra.yaml | 5 ++ .../full/he/global_mmlu_full_he_anatomy.yaml | 5 ++ .../he/global_mmlu_full_he_astronomy.yaml | 5 ++ .../global_mmlu_full_he_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_he_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_he_college_biology.yaml | 5 ++ ...global_mmlu_full_he_college_chemistry.yaml | 5 ++ ...mmlu_full_he_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_he_college_mathematics.yaml | 5 ++ .../global_mmlu_full_he_college_medicine.yaml | 5 ++ .../global_mmlu_full_he_college_physics.yaml | 5 ++ ...global_mmlu_full_he_computer_security.yaml | 5 ++ ...lobal_mmlu_full_he_conceptual_physics.yaml | 5 ++ .../he/global_mmlu_full_he_econometrics.yaml | 5 ++ ...l_mmlu_full_he_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_he_elementary_mathematics.yaml | 5 ++ .../he/global_mmlu_full_he_formal_logic.yaml | 5 ++ .../he/global_mmlu_full_he_global_facts.yaml | 5 ++ ...obal_mmlu_full_he_high_school_biology.yaml | 5 ++ ...al_mmlu_full_he_high_school_chemistry.yaml | 5 ++ ..._full_he_high_school_computer_science.yaml | 5 ++ ..._full_he_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_he_high_school_geography.yaml | 5 ++ ...e_high_school_government_and_politics.yaml | 5 ++ ...lu_full_he_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_he_high_school_mathematics.yaml | 5 ++ ...lu_full_he_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_he_high_school_physics.yaml | 5 ++ ...l_mmlu_full_he_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_he_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_he_high_school_us_history.yaml | 5 ++ ...mlu_full_he_high_school_world_history.yaml | 5 ++ .../he/global_mmlu_full_he_human_aging.yaml | 5 ++ .../global_mmlu_full_he_human_sexuality.yaml | 5 ++ ...global_mmlu_full_he_international_law.yaml | 5 ++ .../he/global_mmlu_full_he_jurisprudence.yaml | 5 ++ ...global_mmlu_full_he_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_he_machine_learning.yaml | 5 ++ .../he/global_mmlu_full_he_management.yaml | 5 ++ .../he/global_mmlu_full_he_marketing.yaml | 5 ++ .../global_mmlu_full_he_medical_genetics.yaml | 5 ++ .../he/global_mmlu_full_he_miscellaneous.yaml | 5 ++ .../global_mmlu_full_he_moral_disputes.yaml | 5 ++ .../global_mmlu_full_he_moral_scenarios.yaml | 5 ++ .../he/global_mmlu_full_he_nutrition.yaml | 5 ++ .../he/global_mmlu_full_he_philosophy.yaml | 5 ++ .../he/global_mmlu_full_he_prehistory.yaml | 5 ++ ..._mmlu_full_he_professional_accounting.yaml | 5 ++ .../global_mmlu_full_he_professional_law.yaml | 5 ++ ...al_mmlu_full_he_professional_medicine.yaml | 5 ++ ..._mmlu_full_he_professional_psychology.yaml | 5 ++ .../global_mmlu_full_he_public_relations.yaml | 5 ++ .../global_mmlu_full_he_security_studies.yaml | 5 ++ .../he/global_mmlu_full_he_sociology.yaml | 5 ++ ...global_mmlu_full_he_us_foreign_policy.yaml | 5 ++ .../full/he/global_mmlu_full_he_virology.yaml | 5 ++ .../global_mmlu_full_he_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/he/utils.py | 73 +++++++++++++++++++ .../full/hi/_global_mmlu_full_hi.yaml | 11 +++ .../hi/_global_mmlu_full_hi_humanities.yaml | 8 ++ .../full/hi/_global_mmlu_full_hi_other.yaml | 8 ++ .../_global_mmlu_full_hi_social_sciences.yaml | 8 ++ .../full/hi/_global_mmlu_full_hi_stem.yaml | 8 ++ .../global_mmlu/full/hi/_hi_template_yaml | 16 ++++ .../global_mmlu_full_hi_abstract_algebra.yaml | 5 ++ .../full/hi/global_mmlu_full_hi_anatomy.yaml | 5 ++ .../hi/global_mmlu_full_hi_astronomy.yaml | 5 ++ .../global_mmlu_full_hi_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_hi_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_hi_college_biology.yaml | 5 ++ ...global_mmlu_full_hi_college_chemistry.yaml | 5 ++ ...mmlu_full_hi_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_hi_college_mathematics.yaml | 5 ++ .../global_mmlu_full_hi_college_medicine.yaml | 5 ++ .../global_mmlu_full_hi_college_physics.yaml | 5 ++ ...global_mmlu_full_hi_computer_security.yaml | 5 ++ ...lobal_mmlu_full_hi_conceptual_physics.yaml | 5 ++ .../hi/global_mmlu_full_hi_econometrics.yaml | 5 ++ ...l_mmlu_full_hi_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_hi_elementary_mathematics.yaml | 5 ++ .../hi/global_mmlu_full_hi_formal_logic.yaml | 5 ++ .../hi/global_mmlu_full_hi_global_facts.yaml | 5 ++ ...obal_mmlu_full_hi_high_school_biology.yaml | 5 ++ ...al_mmlu_full_hi_high_school_chemistry.yaml | 5 ++ ..._full_hi_high_school_computer_science.yaml | 5 ++ ..._full_hi_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_hi_high_school_geography.yaml | 5 ++ ...i_high_school_government_and_politics.yaml | 5 ++ ...lu_full_hi_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_hi_high_school_mathematics.yaml | 5 ++ ...lu_full_hi_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_hi_high_school_physics.yaml | 5 ++ ...l_mmlu_full_hi_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_hi_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_hi_high_school_us_history.yaml | 5 ++ ...mlu_full_hi_high_school_world_history.yaml | 5 ++ .../hi/global_mmlu_full_hi_human_aging.yaml | 5 ++ .../global_mmlu_full_hi_human_sexuality.yaml | 5 ++ ...global_mmlu_full_hi_international_law.yaml | 5 ++ .../hi/global_mmlu_full_hi_jurisprudence.yaml | 5 ++ ...global_mmlu_full_hi_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_hi_machine_learning.yaml | 5 ++ .../hi/global_mmlu_full_hi_management.yaml | 5 ++ .../hi/global_mmlu_full_hi_marketing.yaml | 5 ++ .../global_mmlu_full_hi_medical_genetics.yaml | 5 ++ .../hi/global_mmlu_full_hi_miscellaneous.yaml | 5 ++ .../global_mmlu_full_hi_moral_disputes.yaml | 5 ++ .../global_mmlu_full_hi_moral_scenarios.yaml | 5 ++ .../hi/global_mmlu_full_hi_nutrition.yaml | 5 ++ .../hi/global_mmlu_full_hi_philosophy.yaml | 5 ++ .../hi/global_mmlu_full_hi_prehistory.yaml | 5 ++ ..._mmlu_full_hi_professional_accounting.yaml | 5 ++ .../global_mmlu_full_hi_professional_law.yaml | 5 ++ ...al_mmlu_full_hi_professional_medicine.yaml | 5 ++ ..._mmlu_full_hi_professional_psychology.yaml | 5 ++ .../global_mmlu_full_hi_public_relations.yaml | 5 ++ .../global_mmlu_full_hi_security_studies.yaml | 5 ++ .../hi/global_mmlu_full_hi_sociology.yaml | 5 ++ ...global_mmlu_full_hi_us_foreign_policy.yaml | 5 ++ .../full/hi/global_mmlu_full_hi_virology.yaml | 5 ++ .../global_mmlu_full_hi_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/hi/utils.py | 73 +++++++++++++++++++ .../full/id/_global_mmlu_full_id.yaml | 11 +++ .../id/_global_mmlu_full_id_humanities.yaml | 8 ++ .../full/id/_global_mmlu_full_id_other.yaml | 8 ++ .../_global_mmlu_full_id_social_sciences.yaml | 8 ++ .../full/id/_global_mmlu_full_id_stem.yaml | 8 ++ .../global_mmlu/full/id/_id_template_yaml | 16 ++++ .../global_mmlu_full_id_abstract_algebra.yaml | 5 ++ .../full/id/global_mmlu_full_id_anatomy.yaml | 5 ++ .../id/global_mmlu_full_id_astronomy.yaml | 5 ++ .../global_mmlu_full_id_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_id_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_id_college_biology.yaml | 5 ++ ...global_mmlu_full_id_college_chemistry.yaml | 5 ++ ...mmlu_full_id_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_id_college_mathematics.yaml | 5 ++ .../global_mmlu_full_id_college_medicine.yaml | 5 ++ .../global_mmlu_full_id_college_physics.yaml | 5 ++ ...global_mmlu_full_id_computer_security.yaml | 5 ++ ...lobal_mmlu_full_id_conceptual_physics.yaml | 5 ++ .../id/global_mmlu_full_id_econometrics.yaml | 5 ++ ...l_mmlu_full_id_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_id_elementary_mathematics.yaml | 5 ++ .../id/global_mmlu_full_id_formal_logic.yaml | 5 ++ .../id/global_mmlu_full_id_global_facts.yaml | 5 ++ ...obal_mmlu_full_id_high_school_biology.yaml | 5 ++ ...al_mmlu_full_id_high_school_chemistry.yaml | 5 ++ ..._full_id_high_school_computer_science.yaml | 5 ++ ..._full_id_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_id_high_school_geography.yaml | 5 ++ ...d_high_school_government_and_politics.yaml | 5 ++ ...lu_full_id_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_id_high_school_mathematics.yaml | 5 ++ ...lu_full_id_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_id_high_school_physics.yaml | 5 ++ ...l_mmlu_full_id_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_id_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_id_high_school_us_history.yaml | 5 ++ ...mlu_full_id_high_school_world_history.yaml | 5 ++ .../id/global_mmlu_full_id_human_aging.yaml | 5 ++ .../global_mmlu_full_id_human_sexuality.yaml | 5 ++ ...global_mmlu_full_id_international_law.yaml | 5 ++ .../id/global_mmlu_full_id_jurisprudence.yaml | 5 ++ ...global_mmlu_full_id_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_id_machine_learning.yaml | 5 ++ .../id/global_mmlu_full_id_management.yaml | 5 ++ .../id/global_mmlu_full_id_marketing.yaml | 5 ++ .../global_mmlu_full_id_medical_genetics.yaml | 5 ++ .../id/global_mmlu_full_id_miscellaneous.yaml | 5 ++ .../global_mmlu_full_id_moral_disputes.yaml | 5 ++ .../global_mmlu_full_id_moral_scenarios.yaml | 5 ++ .../id/global_mmlu_full_id_nutrition.yaml | 5 ++ .../id/global_mmlu_full_id_philosophy.yaml | 5 ++ .../id/global_mmlu_full_id_prehistory.yaml | 5 ++ ..._mmlu_full_id_professional_accounting.yaml | 5 ++ .../global_mmlu_full_id_professional_law.yaml | 5 ++ ...al_mmlu_full_id_professional_medicine.yaml | 5 ++ ..._mmlu_full_id_professional_psychology.yaml | 5 ++ .../global_mmlu_full_id_public_relations.yaml | 5 ++ .../global_mmlu_full_id_security_studies.yaml | 5 ++ .../id/global_mmlu_full_id_sociology.yaml | 5 ++ ...global_mmlu_full_id_us_foreign_policy.yaml | 5 ++ .../full/id/global_mmlu_full_id_virology.yaml | 5 ++ .../global_mmlu_full_id_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/id/utils.py | 73 +++++++++++++++++++ .../full/ig/_global_mmlu_full_ig.yaml | 11 +++ .../ig/_global_mmlu_full_ig_humanities.yaml | 8 ++ .../full/ig/_global_mmlu_full_ig_other.yaml | 8 ++ .../_global_mmlu_full_ig_social_sciences.yaml | 8 ++ .../full/ig/_global_mmlu_full_ig_stem.yaml | 8 ++ .../global_mmlu/full/ig/_ig_template_yaml | 16 ++++ .../global_mmlu_full_ig_abstract_algebra.yaml | 5 ++ .../full/ig/global_mmlu_full_ig_anatomy.yaml | 5 ++ .../ig/global_mmlu_full_ig_astronomy.yaml | 5 ++ .../global_mmlu_full_ig_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_ig_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_ig_college_biology.yaml | 5 ++ ...global_mmlu_full_ig_college_chemistry.yaml | 5 ++ ...mmlu_full_ig_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_ig_college_mathematics.yaml | 5 ++ .../global_mmlu_full_ig_college_medicine.yaml | 5 ++ .../global_mmlu_full_ig_college_physics.yaml | 5 ++ ...global_mmlu_full_ig_computer_security.yaml | 5 ++ ...lobal_mmlu_full_ig_conceptual_physics.yaml | 5 ++ .../ig/global_mmlu_full_ig_econometrics.yaml | 5 ++ ...l_mmlu_full_ig_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_ig_elementary_mathematics.yaml | 5 ++ .../ig/global_mmlu_full_ig_formal_logic.yaml | 5 ++ .../ig/global_mmlu_full_ig_global_facts.yaml | 5 ++ ...obal_mmlu_full_ig_high_school_biology.yaml | 5 ++ ...al_mmlu_full_ig_high_school_chemistry.yaml | 5 ++ ..._full_ig_high_school_computer_science.yaml | 5 ++ ..._full_ig_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_ig_high_school_geography.yaml | 5 ++ ...g_high_school_government_and_politics.yaml | 5 ++ ...lu_full_ig_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_ig_high_school_mathematics.yaml | 5 ++ ...lu_full_ig_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_ig_high_school_physics.yaml | 5 ++ ...l_mmlu_full_ig_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_ig_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_ig_high_school_us_history.yaml | 5 ++ ...mlu_full_ig_high_school_world_history.yaml | 5 ++ .../ig/global_mmlu_full_ig_human_aging.yaml | 5 ++ .../global_mmlu_full_ig_human_sexuality.yaml | 5 ++ ...global_mmlu_full_ig_international_law.yaml | 5 ++ .../ig/global_mmlu_full_ig_jurisprudence.yaml | 5 ++ ...global_mmlu_full_ig_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_ig_machine_learning.yaml | 5 ++ .../ig/global_mmlu_full_ig_management.yaml | 5 ++ .../ig/global_mmlu_full_ig_marketing.yaml | 5 ++ .../global_mmlu_full_ig_medical_genetics.yaml | 5 ++ .../ig/global_mmlu_full_ig_miscellaneous.yaml | 5 ++ .../global_mmlu_full_ig_moral_disputes.yaml | 5 ++ .../global_mmlu_full_ig_moral_scenarios.yaml | 5 ++ .../ig/global_mmlu_full_ig_nutrition.yaml | 5 ++ .../ig/global_mmlu_full_ig_philosophy.yaml | 5 ++ .../ig/global_mmlu_full_ig_prehistory.yaml | 5 ++ ..._mmlu_full_ig_professional_accounting.yaml | 5 ++ .../global_mmlu_full_ig_professional_law.yaml | 5 ++ ...al_mmlu_full_ig_professional_medicine.yaml | 5 ++ ..._mmlu_full_ig_professional_psychology.yaml | 5 ++ .../global_mmlu_full_ig_public_relations.yaml | 5 ++ .../global_mmlu_full_ig_security_studies.yaml | 5 ++ .../ig/global_mmlu_full_ig_sociology.yaml | 5 ++ ...global_mmlu_full_ig_us_foreign_policy.yaml | 5 ++ .../full/ig/global_mmlu_full_ig_virology.yaml | 5 ++ .../global_mmlu_full_ig_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/ig/utils.py | 73 +++++++++++++++++++ .../full/it/_global_mmlu_full_it.yaml | 11 +++ .../it/_global_mmlu_full_it_humanities.yaml | 8 ++ .../full/it/_global_mmlu_full_it_other.yaml | 8 ++ .../_global_mmlu_full_it_social_sciences.yaml | 8 ++ .../full/it/_global_mmlu_full_it_stem.yaml | 8 ++ .../global_mmlu/full/it/_it_template_yaml | 16 ++++ .../global_mmlu_full_it_abstract_algebra.yaml | 5 ++ .../full/it/global_mmlu_full_it_anatomy.yaml | 5 ++ .../it/global_mmlu_full_it_astronomy.yaml | 5 ++ .../global_mmlu_full_it_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_it_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_it_college_biology.yaml | 5 ++ ...global_mmlu_full_it_college_chemistry.yaml | 5 ++ ...mmlu_full_it_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_it_college_mathematics.yaml | 5 ++ .../global_mmlu_full_it_college_medicine.yaml | 5 ++ .../global_mmlu_full_it_college_physics.yaml | 5 ++ ...global_mmlu_full_it_computer_security.yaml | 5 ++ ...lobal_mmlu_full_it_conceptual_physics.yaml | 5 ++ .../it/global_mmlu_full_it_econometrics.yaml | 5 ++ ...l_mmlu_full_it_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_it_elementary_mathematics.yaml | 5 ++ .../it/global_mmlu_full_it_formal_logic.yaml | 5 ++ .../it/global_mmlu_full_it_global_facts.yaml | 5 ++ ...obal_mmlu_full_it_high_school_biology.yaml | 5 ++ ...al_mmlu_full_it_high_school_chemistry.yaml | 5 ++ ..._full_it_high_school_computer_science.yaml | 5 ++ ..._full_it_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_it_high_school_geography.yaml | 5 ++ ...t_high_school_government_and_politics.yaml | 5 ++ ...lu_full_it_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_it_high_school_mathematics.yaml | 5 ++ ...lu_full_it_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_it_high_school_physics.yaml | 5 ++ ...l_mmlu_full_it_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_it_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_it_high_school_us_history.yaml | 5 ++ ...mlu_full_it_high_school_world_history.yaml | 5 ++ .../it/global_mmlu_full_it_human_aging.yaml | 5 ++ .../global_mmlu_full_it_human_sexuality.yaml | 5 ++ ...global_mmlu_full_it_international_law.yaml | 5 ++ .../it/global_mmlu_full_it_jurisprudence.yaml | 5 ++ ...global_mmlu_full_it_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_it_machine_learning.yaml | 5 ++ .../it/global_mmlu_full_it_management.yaml | 5 ++ .../it/global_mmlu_full_it_marketing.yaml | 5 ++ .../global_mmlu_full_it_medical_genetics.yaml | 5 ++ .../it/global_mmlu_full_it_miscellaneous.yaml | 5 ++ .../global_mmlu_full_it_moral_disputes.yaml | 5 ++ .../global_mmlu_full_it_moral_scenarios.yaml | 5 ++ .../it/global_mmlu_full_it_nutrition.yaml | 5 ++ .../it/global_mmlu_full_it_philosophy.yaml | 5 ++ .../it/global_mmlu_full_it_prehistory.yaml | 5 ++ ..._mmlu_full_it_professional_accounting.yaml | 5 ++ .../global_mmlu_full_it_professional_law.yaml | 5 ++ ...al_mmlu_full_it_professional_medicine.yaml | 5 ++ ..._mmlu_full_it_professional_psychology.yaml | 5 ++ .../global_mmlu_full_it_public_relations.yaml | 5 ++ .../global_mmlu_full_it_security_studies.yaml | 5 ++ .../it/global_mmlu_full_it_sociology.yaml | 5 ++ ...global_mmlu_full_it_us_foreign_policy.yaml | 5 ++ .../full/it/global_mmlu_full_it_virology.yaml | 5 ++ .../global_mmlu_full_it_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/it/utils.py | 73 +++++++++++++++++++ .../full/ja/_global_mmlu_full_ja.yaml | 11 +++ .../ja/_global_mmlu_full_ja_humanities.yaml | 8 ++ .../full/ja/_global_mmlu_full_ja_other.yaml | 8 ++ .../_global_mmlu_full_ja_social_sciences.yaml | 8 ++ .../full/ja/_global_mmlu_full_ja_stem.yaml | 8 ++ .../global_mmlu/full/ja/_ja_template_yaml | 16 ++++ .../global_mmlu_full_ja_abstract_algebra.yaml | 5 ++ .../full/ja/global_mmlu_full_ja_anatomy.yaml | 5 ++ .../ja/global_mmlu_full_ja_astronomy.yaml | 5 ++ .../global_mmlu_full_ja_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_ja_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_ja_college_biology.yaml | 5 ++ ...global_mmlu_full_ja_college_chemistry.yaml | 5 ++ ...mmlu_full_ja_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_ja_college_mathematics.yaml | 5 ++ .../global_mmlu_full_ja_college_medicine.yaml | 5 ++ .../global_mmlu_full_ja_college_physics.yaml | 5 ++ ...global_mmlu_full_ja_computer_security.yaml | 5 ++ ...lobal_mmlu_full_ja_conceptual_physics.yaml | 5 ++ .../ja/global_mmlu_full_ja_econometrics.yaml | 5 ++ ...l_mmlu_full_ja_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_ja_elementary_mathematics.yaml | 5 ++ .../ja/global_mmlu_full_ja_formal_logic.yaml | 5 ++ .../ja/global_mmlu_full_ja_global_facts.yaml | 5 ++ ...obal_mmlu_full_ja_high_school_biology.yaml | 5 ++ ...al_mmlu_full_ja_high_school_chemistry.yaml | 5 ++ ..._full_ja_high_school_computer_science.yaml | 5 ++ ..._full_ja_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_ja_high_school_geography.yaml | 5 ++ ...a_high_school_government_and_politics.yaml | 5 ++ ...lu_full_ja_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_ja_high_school_mathematics.yaml | 5 ++ ...lu_full_ja_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_ja_high_school_physics.yaml | 5 ++ ...l_mmlu_full_ja_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_ja_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_ja_high_school_us_history.yaml | 5 ++ ...mlu_full_ja_high_school_world_history.yaml | 5 ++ .../ja/global_mmlu_full_ja_human_aging.yaml | 5 ++ .../global_mmlu_full_ja_human_sexuality.yaml | 5 ++ ...global_mmlu_full_ja_international_law.yaml | 5 ++ .../ja/global_mmlu_full_ja_jurisprudence.yaml | 5 ++ ...global_mmlu_full_ja_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_ja_machine_learning.yaml | 5 ++ .../ja/global_mmlu_full_ja_management.yaml | 5 ++ .../ja/global_mmlu_full_ja_marketing.yaml | 5 ++ .../global_mmlu_full_ja_medical_genetics.yaml | 5 ++ .../ja/global_mmlu_full_ja_miscellaneous.yaml | 5 ++ .../global_mmlu_full_ja_moral_disputes.yaml | 5 ++ .../global_mmlu_full_ja_moral_scenarios.yaml | 5 ++ .../ja/global_mmlu_full_ja_nutrition.yaml | 5 ++ .../ja/global_mmlu_full_ja_philosophy.yaml | 5 ++ .../ja/global_mmlu_full_ja_prehistory.yaml | 5 ++ ..._mmlu_full_ja_professional_accounting.yaml | 5 ++ .../global_mmlu_full_ja_professional_law.yaml | 5 ++ ...al_mmlu_full_ja_professional_medicine.yaml | 5 ++ ..._mmlu_full_ja_professional_psychology.yaml | 5 ++ .../global_mmlu_full_ja_public_relations.yaml | 5 ++ .../global_mmlu_full_ja_security_studies.yaml | 5 ++ .../ja/global_mmlu_full_ja_sociology.yaml | 5 ++ ...global_mmlu_full_ja_us_foreign_policy.yaml | 5 ++ .../full/ja/global_mmlu_full_ja_virology.yaml | 5 ++ .../global_mmlu_full_ja_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/ja/utils.py | 73 +++++++++++++++++++ .../full/ko/_global_mmlu_full_ko.yaml | 11 +++ .../ko/_global_mmlu_full_ko_humanities.yaml | 8 ++ .../full/ko/_global_mmlu_full_ko_other.yaml | 8 ++ .../_global_mmlu_full_ko_social_sciences.yaml | 8 ++ .../full/ko/_global_mmlu_full_ko_stem.yaml | 8 ++ .../global_mmlu/full/ko/_ko_template_yaml | 16 ++++ .../global_mmlu_full_ko_abstract_algebra.yaml | 5 ++ .../full/ko/global_mmlu_full_ko_anatomy.yaml | 5 ++ .../ko/global_mmlu_full_ko_astronomy.yaml | 5 ++ .../global_mmlu_full_ko_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_ko_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_ko_college_biology.yaml | 5 ++ ...global_mmlu_full_ko_college_chemistry.yaml | 5 ++ ...mmlu_full_ko_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_ko_college_mathematics.yaml | 5 ++ .../global_mmlu_full_ko_college_medicine.yaml | 5 ++ .../global_mmlu_full_ko_college_physics.yaml | 5 ++ ...global_mmlu_full_ko_computer_security.yaml | 5 ++ ...lobal_mmlu_full_ko_conceptual_physics.yaml | 5 ++ .../ko/global_mmlu_full_ko_econometrics.yaml | 5 ++ ...l_mmlu_full_ko_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_ko_elementary_mathematics.yaml | 5 ++ .../ko/global_mmlu_full_ko_formal_logic.yaml | 5 ++ .../ko/global_mmlu_full_ko_global_facts.yaml | 5 ++ ...obal_mmlu_full_ko_high_school_biology.yaml | 5 ++ ...al_mmlu_full_ko_high_school_chemistry.yaml | 5 ++ ..._full_ko_high_school_computer_science.yaml | 5 ++ ..._full_ko_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_ko_high_school_geography.yaml | 5 ++ ...o_high_school_government_and_politics.yaml | 5 ++ ...lu_full_ko_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_ko_high_school_mathematics.yaml | 5 ++ ...lu_full_ko_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_ko_high_school_physics.yaml | 5 ++ ...l_mmlu_full_ko_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_ko_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_ko_high_school_us_history.yaml | 5 ++ ...mlu_full_ko_high_school_world_history.yaml | 5 ++ .../ko/global_mmlu_full_ko_human_aging.yaml | 5 ++ .../global_mmlu_full_ko_human_sexuality.yaml | 5 ++ ...global_mmlu_full_ko_international_law.yaml | 5 ++ .../ko/global_mmlu_full_ko_jurisprudence.yaml | 5 ++ ...global_mmlu_full_ko_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_ko_machine_learning.yaml | 5 ++ .../ko/global_mmlu_full_ko_management.yaml | 5 ++ .../ko/global_mmlu_full_ko_marketing.yaml | 5 ++ .../global_mmlu_full_ko_medical_genetics.yaml | 5 ++ .../ko/global_mmlu_full_ko_miscellaneous.yaml | 5 ++ .../global_mmlu_full_ko_moral_disputes.yaml | 5 ++ .../global_mmlu_full_ko_moral_scenarios.yaml | 5 ++ .../ko/global_mmlu_full_ko_nutrition.yaml | 5 ++ .../ko/global_mmlu_full_ko_philosophy.yaml | 5 ++ .../ko/global_mmlu_full_ko_prehistory.yaml | 5 ++ ..._mmlu_full_ko_professional_accounting.yaml | 5 ++ .../global_mmlu_full_ko_professional_law.yaml | 5 ++ ...al_mmlu_full_ko_professional_medicine.yaml | 5 ++ ..._mmlu_full_ko_professional_psychology.yaml | 5 ++ .../global_mmlu_full_ko_public_relations.yaml | 5 ++ .../global_mmlu_full_ko_security_studies.yaml | 5 ++ .../ko/global_mmlu_full_ko_sociology.yaml | 5 ++ ...global_mmlu_full_ko_us_foreign_policy.yaml | 5 ++ .../full/ko/global_mmlu_full_ko_virology.yaml | 5 ++ .../global_mmlu_full_ko_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/ko/utils.py | 73 +++++++++++++++++++ .../full/ky/_global_mmlu_full_ky.yaml | 11 +++ .../ky/_global_mmlu_full_ky_humanities.yaml | 8 ++ .../full/ky/_global_mmlu_full_ky_other.yaml | 8 ++ .../_global_mmlu_full_ky_social_sciences.yaml | 8 ++ .../full/ky/_global_mmlu_full_ky_stem.yaml | 8 ++ .../global_mmlu/full/ky/_ky_template_yaml | 16 ++++ .../global_mmlu_full_ky_abstract_algebra.yaml | 5 ++ .../full/ky/global_mmlu_full_ky_anatomy.yaml | 5 ++ .../ky/global_mmlu_full_ky_astronomy.yaml | 5 ++ .../global_mmlu_full_ky_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_ky_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_ky_college_biology.yaml | 5 ++ ...global_mmlu_full_ky_college_chemistry.yaml | 5 ++ ...mmlu_full_ky_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_ky_college_mathematics.yaml | 5 ++ .../global_mmlu_full_ky_college_medicine.yaml | 5 ++ .../global_mmlu_full_ky_college_physics.yaml | 5 ++ ...global_mmlu_full_ky_computer_security.yaml | 5 ++ ...lobal_mmlu_full_ky_conceptual_physics.yaml | 5 ++ .../ky/global_mmlu_full_ky_econometrics.yaml | 5 ++ ...l_mmlu_full_ky_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_ky_elementary_mathematics.yaml | 5 ++ .../ky/global_mmlu_full_ky_formal_logic.yaml | 5 ++ .../ky/global_mmlu_full_ky_global_facts.yaml | 5 ++ ...obal_mmlu_full_ky_high_school_biology.yaml | 5 ++ ...al_mmlu_full_ky_high_school_chemistry.yaml | 5 ++ ..._full_ky_high_school_computer_science.yaml | 5 ++ ..._full_ky_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_ky_high_school_geography.yaml | 5 ++ ...y_high_school_government_and_politics.yaml | 5 ++ ...lu_full_ky_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_ky_high_school_mathematics.yaml | 5 ++ ...lu_full_ky_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_ky_high_school_physics.yaml | 5 ++ ...l_mmlu_full_ky_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_ky_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_ky_high_school_us_history.yaml | 5 ++ ...mlu_full_ky_high_school_world_history.yaml | 5 ++ .../ky/global_mmlu_full_ky_human_aging.yaml | 5 ++ .../global_mmlu_full_ky_human_sexuality.yaml | 5 ++ ...global_mmlu_full_ky_international_law.yaml | 5 ++ .../ky/global_mmlu_full_ky_jurisprudence.yaml | 5 ++ ...global_mmlu_full_ky_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_ky_machine_learning.yaml | 5 ++ .../ky/global_mmlu_full_ky_management.yaml | 5 ++ .../ky/global_mmlu_full_ky_marketing.yaml | 5 ++ .../global_mmlu_full_ky_medical_genetics.yaml | 5 ++ .../ky/global_mmlu_full_ky_miscellaneous.yaml | 5 ++ .../global_mmlu_full_ky_moral_disputes.yaml | 5 ++ .../global_mmlu_full_ky_moral_scenarios.yaml | 5 ++ .../ky/global_mmlu_full_ky_nutrition.yaml | 5 ++ .../ky/global_mmlu_full_ky_philosophy.yaml | 5 ++ .../ky/global_mmlu_full_ky_prehistory.yaml | 5 ++ ..._mmlu_full_ky_professional_accounting.yaml | 5 ++ .../global_mmlu_full_ky_professional_law.yaml | 5 ++ ...al_mmlu_full_ky_professional_medicine.yaml | 5 ++ ..._mmlu_full_ky_professional_psychology.yaml | 5 ++ .../global_mmlu_full_ky_public_relations.yaml | 5 ++ .../global_mmlu_full_ky_security_studies.yaml | 5 ++ .../ky/global_mmlu_full_ky_sociology.yaml | 5 ++ ...global_mmlu_full_ky_us_foreign_policy.yaml | 5 ++ .../full/ky/global_mmlu_full_ky_virology.yaml | 5 ++ .../global_mmlu_full_ky_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/ky/utils.py | 73 +++++++++++++++++++ .../full/lt/_global_mmlu_full_lt.yaml | 11 +++ .../lt/_global_mmlu_full_lt_humanities.yaml | 8 ++ .../full/lt/_global_mmlu_full_lt_other.yaml | 8 ++ .../_global_mmlu_full_lt_social_sciences.yaml | 8 ++ .../full/lt/_global_mmlu_full_lt_stem.yaml | 8 ++ .../global_mmlu/full/lt/_lt_template_yaml | 16 ++++ .../global_mmlu_full_lt_abstract_algebra.yaml | 5 ++ .../full/lt/global_mmlu_full_lt_anatomy.yaml | 5 ++ .../lt/global_mmlu_full_lt_astronomy.yaml | 5 ++ .../global_mmlu_full_lt_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_lt_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_lt_college_biology.yaml | 5 ++ ...global_mmlu_full_lt_college_chemistry.yaml | 5 ++ ...mmlu_full_lt_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_lt_college_mathematics.yaml | 5 ++ .../global_mmlu_full_lt_college_medicine.yaml | 5 ++ .../global_mmlu_full_lt_college_physics.yaml | 5 ++ ...global_mmlu_full_lt_computer_security.yaml | 5 ++ ...lobal_mmlu_full_lt_conceptual_physics.yaml | 5 ++ .../lt/global_mmlu_full_lt_econometrics.yaml | 5 ++ ...l_mmlu_full_lt_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_lt_elementary_mathematics.yaml | 5 ++ .../lt/global_mmlu_full_lt_formal_logic.yaml | 5 ++ .../lt/global_mmlu_full_lt_global_facts.yaml | 5 ++ ...obal_mmlu_full_lt_high_school_biology.yaml | 5 ++ ...al_mmlu_full_lt_high_school_chemistry.yaml | 5 ++ ..._full_lt_high_school_computer_science.yaml | 5 ++ ..._full_lt_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_lt_high_school_geography.yaml | 5 ++ ...t_high_school_government_and_politics.yaml | 5 ++ ...lu_full_lt_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_lt_high_school_mathematics.yaml | 5 ++ ...lu_full_lt_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_lt_high_school_physics.yaml | 5 ++ ...l_mmlu_full_lt_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_lt_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_lt_high_school_us_history.yaml | 5 ++ ...mlu_full_lt_high_school_world_history.yaml | 5 ++ .../lt/global_mmlu_full_lt_human_aging.yaml | 5 ++ .../global_mmlu_full_lt_human_sexuality.yaml | 5 ++ ...global_mmlu_full_lt_international_law.yaml | 5 ++ .../lt/global_mmlu_full_lt_jurisprudence.yaml | 5 ++ ...global_mmlu_full_lt_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_lt_machine_learning.yaml | 5 ++ .../lt/global_mmlu_full_lt_management.yaml | 5 ++ .../lt/global_mmlu_full_lt_marketing.yaml | 5 ++ .../global_mmlu_full_lt_medical_genetics.yaml | 5 ++ .../lt/global_mmlu_full_lt_miscellaneous.yaml | 5 ++ .../global_mmlu_full_lt_moral_disputes.yaml | 5 ++ .../global_mmlu_full_lt_moral_scenarios.yaml | 5 ++ .../lt/global_mmlu_full_lt_nutrition.yaml | 5 ++ .../lt/global_mmlu_full_lt_philosophy.yaml | 5 ++ .../lt/global_mmlu_full_lt_prehistory.yaml | 5 ++ ..._mmlu_full_lt_professional_accounting.yaml | 5 ++ .../global_mmlu_full_lt_professional_law.yaml | 5 ++ ...al_mmlu_full_lt_professional_medicine.yaml | 5 ++ ..._mmlu_full_lt_professional_psychology.yaml | 5 ++ .../global_mmlu_full_lt_public_relations.yaml | 5 ++ .../global_mmlu_full_lt_security_studies.yaml | 5 ++ .../lt/global_mmlu_full_lt_sociology.yaml | 5 ++ ...global_mmlu_full_lt_us_foreign_policy.yaml | 5 ++ .../full/lt/global_mmlu_full_lt_virology.yaml | 5 ++ .../global_mmlu_full_lt_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/lt/utils.py | 73 +++++++++++++++++++ .../full/mg/_global_mmlu_full_mg.yaml | 11 +++ .../mg/_global_mmlu_full_mg_humanities.yaml | 8 ++ .../full/mg/_global_mmlu_full_mg_other.yaml | 8 ++ .../_global_mmlu_full_mg_social_sciences.yaml | 8 ++ .../full/mg/_global_mmlu_full_mg_stem.yaml | 8 ++ .../global_mmlu/full/mg/_mg_template_yaml | 16 ++++ .../global_mmlu_full_mg_abstract_algebra.yaml | 5 ++ .../full/mg/global_mmlu_full_mg_anatomy.yaml | 5 ++ .../mg/global_mmlu_full_mg_astronomy.yaml | 5 ++ .../global_mmlu_full_mg_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_mg_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_mg_college_biology.yaml | 5 ++ ...global_mmlu_full_mg_college_chemistry.yaml | 5 ++ ...mmlu_full_mg_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_mg_college_mathematics.yaml | 5 ++ .../global_mmlu_full_mg_college_medicine.yaml | 5 ++ .../global_mmlu_full_mg_college_physics.yaml | 5 ++ ...global_mmlu_full_mg_computer_security.yaml | 5 ++ ...lobal_mmlu_full_mg_conceptual_physics.yaml | 5 ++ .../mg/global_mmlu_full_mg_econometrics.yaml | 5 ++ ...l_mmlu_full_mg_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_mg_elementary_mathematics.yaml | 5 ++ .../mg/global_mmlu_full_mg_formal_logic.yaml | 5 ++ .../mg/global_mmlu_full_mg_global_facts.yaml | 5 ++ ...obal_mmlu_full_mg_high_school_biology.yaml | 5 ++ ...al_mmlu_full_mg_high_school_chemistry.yaml | 5 ++ ..._full_mg_high_school_computer_science.yaml | 5 ++ ..._full_mg_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_mg_high_school_geography.yaml | 5 ++ ...g_high_school_government_and_politics.yaml | 5 ++ ...lu_full_mg_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_mg_high_school_mathematics.yaml | 5 ++ ...lu_full_mg_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_mg_high_school_physics.yaml | 5 ++ ...l_mmlu_full_mg_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_mg_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_mg_high_school_us_history.yaml | 5 ++ ...mlu_full_mg_high_school_world_history.yaml | 5 ++ .../mg/global_mmlu_full_mg_human_aging.yaml | 5 ++ .../global_mmlu_full_mg_human_sexuality.yaml | 5 ++ ...global_mmlu_full_mg_international_law.yaml | 5 ++ .../mg/global_mmlu_full_mg_jurisprudence.yaml | 5 ++ ...global_mmlu_full_mg_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_mg_machine_learning.yaml | 5 ++ .../mg/global_mmlu_full_mg_management.yaml | 5 ++ .../mg/global_mmlu_full_mg_marketing.yaml | 5 ++ .../global_mmlu_full_mg_medical_genetics.yaml | 5 ++ .../mg/global_mmlu_full_mg_miscellaneous.yaml | 5 ++ .../global_mmlu_full_mg_moral_disputes.yaml | 5 ++ .../global_mmlu_full_mg_moral_scenarios.yaml | 5 ++ .../mg/global_mmlu_full_mg_nutrition.yaml | 5 ++ .../mg/global_mmlu_full_mg_philosophy.yaml | 5 ++ .../mg/global_mmlu_full_mg_prehistory.yaml | 5 ++ ..._mmlu_full_mg_professional_accounting.yaml | 5 ++ .../global_mmlu_full_mg_professional_law.yaml | 5 ++ ...al_mmlu_full_mg_professional_medicine.yaml | 5 ++ ..._mmlu_full_mg_professional_psychology.yaml | 5 ++ .../global_mmlu_full_mg_public_relations.yaml | 5 ++ .../global_mmlu_full_mg_security_studies.yaml | 5 ++ .../mg/global_mmlu_full_mg_sociology.yaml | 5 ++ ...global_mmlu_full_mg_us_foreign_policy.yaml | 5 ++ .../full/mg/global_mmlu_full_mg_virology.yaml | 5 ++ .../global_mmlu_full_mg_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/mg/utils.py | 73 +++++++++++++++++++ .../full/ms/_global_mmlu_full_ms.yaml | 11 +++ .../ms/_global_mmlu_full_ms_humanities.yaml | 8 ++ .../full/ms/_global_mmlu_full_ms_other.yaml | 8 ++ .../_global_mmlu_full_ms_social_sciences.yaml | 8 ++ .../full/ms/_global_mmlu_full_ms_stem.yaml | 8 ++ .../global_mmlu/full/ms/_ms_template_yaml | 16 ++++ .../global_mmlu_full_ms_abstract_algebra.yaml | 5 ++ .../full/ms/global_mmlu_full_ms_anatomy.yaml | 5 ++ .../ms/global_mmlu_full_ms_astronomy.yaml | 5 ++ .../global_mmlu_full_ms_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_ms_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_ms_college_biology.yaml | 5 ++ ...global_mmlu_full_ms_college_chemistry.yaml | 5 ++ ...mmlu_full_ms_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_ms_college_mathematics.yaml | 5 ++ .../global_mmlu_full_ms_college_medicine.yaml | 5 ++ .../global_mmlu_full_ms_college_physics.yaml | 5 ++ ...global_mmlu_full_ms_computer_security.yaml | 5 ++ ...lobal_mmlu_full_ms_conceptual_physics.yaml | 5 ++ .../ms/global_mmlu_full_ms_econometrics.yaml | 5 ++ ...l_mmlu_full_ms_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_ms_elementary_mathematics.yaml | 5 ++ .../ms/global_mmlu_full_ms_formal_logic.yaml | 5 ++ .../ms/global_mmlu_full_ms_global_facts.yaml | 5 ++ ...obal_mmlu_full_ms_high_school_biology.yaml | 5 ++ ...al_mmlu_full_ms_high_school_chemistry.yaml | 5 ++ ..._full_ms_high_school_computer_science.yaml | 5 ++ ..._full_ms_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_ms_high_school_geography.yaml | 5 ++ ...s_high_school_government_and_politics.yaml | 5 ++ ...lu_full_ms_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_ms_high_school_mathematics.yaml | 5 ++ ...lu_full_ms_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_ms_high_school_physics.yaml | 5 ++ ...l_mmlu_full_ms_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_ms_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_ms_high_school_us_history.yaml | 5 ++ ...mlu_full_ms_high_school_world_history.yaml | 5 ++ .../ms/global_mmlu_full_ms_human_aging.yaml | 5 ++ .../global_mmlu_full_ms_human_sexuality.yaml | 5 ++ ...global_mmlu_full_ms_international_law.yaml | 5 ++ .../ms/global_mmlu_full_ms_jurisprudence.yaml | 5 ++ ...global_mmlu_full_ms_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_ms_machine_learning.yaml | 5 ++ .../ms/global_mmlu_full_ms_management.yaml | 5 ++ .../ms/global_mmlu_full_ms_marketing.yaml | 5 ++ .../global_mmlu_full_ms_medical_genetics.yaml | 5 ++ .../ms/global_mmlu_full_ms_miscellaneous.yaml | 5 ++ .../global_mmlu_full_ms_moral_disputes.yaml | 5 ++ .../global_mmlu_full_ms_moral_scenarios.yaml | 5 ++ .../ms/global_mmlu_full_ms_nutrition.yaml | 5 ++ .../ms/global_mmlu_full_ms_philosophy.yaml | 5 ++ .../ms/global_mmlu_full_ms_prehistory.yaml | 5 ++ ..._mmlu_full_ms_professional_accounting.yaml | 5 ++ .../global_mmlu_full_ms_professional_law.yaml | 5 ++ ...al_mmlu_full_ms_professional_medicine.yaml | 5 ++ ..._mmlu_full_ms_professional_psychology.yaml | 5 ++ .../global_mmlu_full_ms_public_relations.yaml | 5 ++ .../global_mmlu_full_ms_security_studies.yaml | 5 ++ .../ms/global_mmlu_full_ms_sociology.yaml | 5 ++ ...global_mmlu_full_ms_us_foreign_policy.yaml | 5 ++ .../full/ms/global_mmlu_full_ms_virology.yaml | 5 ++ .../global_mmlu_full_ms_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/ms/utils.py | 73 +++++++++++++++++++ .../full/ne/_global_mmlu_full_ne.yaml | 11 +++ .../ne/_global_mmlu_full_ne_humanities.yaml | 8 ++ .../full/ne/_global_mmlu_full_ne_other.yaml | 8 ++ .../_global_mmlu_full_ne_social_sciences.yaml | 8 ++ .../full/ne/_global_mmlu_full_ne_stem.yaml | 8 ++ .../global_mmlu/full/ne/_ne_template_yaml | 16 ++++ .../global_mmlu_full_ne_abstract_algebra.yaml | 5 ++ .../full/ne/global_mmlu_full_ne_anatomy.yaml | 5 ++ .../ne/global_mmlu_full_ne_astronomy.yaml | 5 ++ .../global_mmlu_full_ne_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_ne_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_ne_college_biology.yaml | 5 ++ ...global_mmlu_full_ne_college_chemistry.yaml | 5 ++ ...mmlu_full_ne_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_ne_college_mathematics.yaml | 5 ++ .../global_mmlu_full_ne_college_medicine.yaml | 5 ++ .../global_mmlu_full_ne_college_physics.yaml | 5 ++ ...global_mmlu_full_ne_computer_security.yaml | 5 ++ ...lobal_mmlu_full_ne_conceptual_physics.yaml | 5 ++ .../ne/global_mmlu_full_ne_econometrics.yaml | 5 ++ ...l_mmlu_full_ne_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_ne_elementary_mathematics.yaml | 5 ++ .../ne/global_mmlu_full_ne_formal_logic.yaml | 5 ++ .../ne/global_mmlu_full_ne_global_facts.yaml | 5 ++ ...obal_mmlu_full_ne_high_school_biology.yaml | 5 ++ ...al_mmlu_full_ne_high_school_chemistry.yaml | 5 ++ ..._full_ne_high_school_computer_science.yaml | 5 ++ ..._full_ne_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_ne_high_school_geography.yaml | 5 ++ ...e_high_school_government_and_politics.yaml | 5 ++ ...lu_full_ne_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_ne_high_school_mathematics.yaml | 5 ++ ...lu_full_ne_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_ne_high_school_physics.yaml | 5 ++ ...l_mmlu_full_ne_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_ne_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_ne_high_school_us_history.yaml | 5 ++ ...mlu_full_ne_high_school_world_history.yaml | 5 ++ .../ne/global_mmlu_full_ne_human_aging.yaml | 5 ++ .../global_mmlu_full_ne_human_sexuality.yaml | 5 ++ ...global_mmlu_full_ne_international_law.yaml | 5 ++ .../ne/global_mmlu_full_ne_jurisprudence.yaml | 5 ++ ...global_mmlu_full_ne_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_ne_machine_learning.yaml | 5 ++ .../ne/global_mmlu_full_ne_management.yaml | 5 ++ .../ne/global_mmlu_full_ne_marketing.yaml | 5 ++ .../global_mmlu_full_ne_medical_genetics.yaml | 5 ++ .../ne/global_mmlu_full_ne_miscellaneous.yaml | 5 ++ .../global_mmlu_full_ne_moral_disputes.yaml | 5 ++ .../global_mmlu_full_ne_moral_scenarios.yaml | 5 ++ .../ne/global_mmlu_full_ne_nutrition.yaml | 5 ++ .../ne/global_mmlu_full_ne_philosophy.yaml | 5 ++ .../ne/global_mmlu_full_ne_prehistory.yaml | 5 ++ ..._mmlu_full_ne_professional_accounting.yaml | 5 ++ .../global_mmlu_full_ne_professional_law.yaml | 5 ++ ...al_mmlu_full_ne_professional_medicine.yaml | 5 ++ ..._mmlu_full_ne_professional_psychology.yaml | 5 ++ .../global_mmlu_full_ne_public_relations.yaml | 5 ++ .../global_mmlu_full_ne_security_studies.yaml | 5 ++ .../ne/global_mmlu_full_ne_sociology.yaml | 5 ++ ...global_mmlu_full_ne_us_foreign_policy.yaml | 5 ++ .../full/ne/global_mmlu_full_ne_virology.yaml | 5 ++ .../global_mmlu_full_ne_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/ne/utils.py | 73 +++++++++++++++++++ .../full/nl/_global_mmlu_full_nl.yaml | 11 +++ .../nl/_global_mmlu_full_nl_humanities.yaml | 8 ++ .../full/nl/_global_mmlu_full_nl_other.yaml | 8 ++ .../_global_mmlu_full_nl_social_sciences.yaml | 8 ++ .../full/nl/_global_mmlu_full_nl_stem.yaml | 8 ++ .../global_mmlu/full/nl/_nl_template_yaml | 16 ++++ .../global_mmlu_full_nl_abstract_algebra.yaml | 5 ++ .../full/nl/global_mmlu_full_nl_anatomy.yaml | 5 ++ .../nl/global_mmlu_full_nl_astronomy.yaml | 5 ++ .../global_mmlu_full_nl_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_nl_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_nl_college_biology.yaml | 5 ++ ...global_mmlu_full_nl_college_chemistry.yaml | 5 ++ ...mmlu_full_nl_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_nl_college_mathematics.yaml | 5 ++ .../global_mmlu_full_nl_college_medicine.yaml | 5 ++ .../global_mmlu_full_nl_college_physics.yaml | 5 ++ ...global_mmlu_full_nl_computer_security.yaml | 5 ++ ...lobal_mmlu_full_nl_conceptual_physics.yaml | 5 ++ .../nl/global_mmlu_full_nl_econometrics.yaml | 5 ++ ...l_mmlu_full_nl_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_nl_elementary_mathematics.yaml | 5 ++ .../nl/global_mmlu_full_nl_formal_logic.yaml | 5 ++ .../nl/global_mmlu_full_nl_global_facts.yaml | 5 ++ ...obal_mmlu_full_nl_high_school_biology.yaml | 5 ++ ...al_mmlu_full_nl_high_school_chemistry.yaml | 5 ++ ..._full_nl_high_school_computer_science.yaml | 5 ++ ..._full_nl_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_nl_high_school_geography.yaml | 5 ++ ...l_high_school_government_and_politics.yaml | 5 ++ ...lu_full_nl_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_nl_high_school_mathematics.yaml | 5 ++ ...lu_full_nl_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_nl_high_school_physics.yaml | 5 ++ ...l_mmlu_full_nl_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_nl_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_nl_high_school_us_history.yaml | 5 ++ ...mlu_full_nl_high_school_world_history.yaml | 5 ++ .../nl/global_mmlu_full_nl_human_aging.yaml | 5 ++ .../global_mmlu_full_nl_human_sexuality.yaml | 5 ++ ...global_mmlu_full_nl_international_law.yaml | 5 ++ .../nl/global_mmlu_full_nl_jurisprudence.yaml | 5 ++ ...global_mmlu_full_nl_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_nl_machine_learning.yaml | 5 ++ .../nl/global_mmlu_full_nl_management.yaml | 5 ++ .../nl/global_mmlu_full_nl_marketing.yaml | 5 ++ .../global_mmlu_full_nl_medical_genetics.yaml | 5 ++ .../nl/global_mmlu_full_nl_miscellaneous.yaml | 5 ++ .../global_mmlu_full_nl_moral_disputes.yaml | 5 ++ .../global_mmlu_full_nl_moral_scenarios.yaml | 5 ++ .../nl/global_mmlu_full_nl_nutrition.yaml | 5 ++ .../nl/global_mmlu_full_nl_philosophy.yaml | 5 ++ .../nl/global_mmlu_full_nl_prehistory.yaml | 5 ++ ..._mmlu_full_nl_professional_accounting.yaml | 5 ++ .../global_mmlu_full_nl_professional_law.yaml | 5 ++ ...al_mmlu_full_nl_professional_medicine.yaml | 5 ++ ..._mmlu_full_nl_professional_psychology.yaml | 5 ++ .../global_mmlu_full_nl_public_relations.yaml | 5 ++ .../global_mmlu_full_nl_security_studies.yaml | 5 ++ .../nl/global_mmlu_full_nl_sociology.yaml | 5 ++ ...global_mmlu_full_nl_us_foreign_policy.yaml | 5 ++ .../full/nl/global_mmlu_full_nl_virology.yaml | 5 ++ .../global_mmlu_full_nl_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/nl/utils.py | 73 +++++++++++++++++++ .../full/ny/_global_mmlu_full_ny.yaml | 11 +++ .../ny/_global_mmlu_full_ny_humanities.yaml | 8 ++ .../full/ny/_global_mmlu_full_ny_other.yaml | 8 ++ .../_global_mmlu_full_ny_social_sciences.yaml | 8 ++ .../full/ny/_global_mmlu_full_ny_stem.yaml | 8 ++ .../global_mmlu/full/ny/_ny_template_yaml | 16 ++++ .../global_mmlu_full_ny_abstract_algebra.yaml | 5 ++ .../full/ny/global_mmlu_full_ny_anatomy.yaml | 5 ++ .../ny/global_mmlu_full_ny_astronomy.yaml | 5 ++ .../global_mmlu_full_ny_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_ny_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_ny_college_biology.yaml | 5 ++ ...global_mmlu_full_ny_college_chemistry.yaml | 5 ++ ...mmlu_full_ny_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_ny_college_mathematics.yaml | 5 ++ .../global_mmlu_full_ny_college_medicine.yaml | 5 ++ .../global_mmlu_full_ny_college_physics.yaml | 5 ++ ...global_mmlu_full_ny_computer_security.yaml | 5 ++ ...lobal_mmlu_full_ny_conceptual_physics.yaml | 5 ++ .../ny/global_mmlu_full_ny_econometrics.yaml | 5 ++ ...l_mmlu_full_ny_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_ny_elementary_mathematics.yaml | 5 ++ .../ny/global_mmlu_full_ny_formal_logic.yaml | 5 ++ .../ny/global_mmlu_full_ny_global_facts.yaml | 5 ++ ...obal_mmlu_full_ny_high_school_biology.yaml | 5 ++ ...al_mmlu_full_ny_high_school_chemistry.yaml | 5 ++ ..._full_ny_high_school_computer_science.yaml | 5 ++ ..._full_ny_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_ny_high_school_geography.yaml | 5 ++ ...y_high_school_government_and_politics.yaml | 5 ++ ...lu_full_ny_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_ny_high_school_mathematics.yaml | 5 ++ ...lu_full_ny_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_ny_high_school_physics.yaml | 5 ++ ...l_mmlu_full_ny_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_ny_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_ny_high_school_us_history.yaml | 5 ++ ...mlu_full_ny_high_school_world_history.yaml | 5 ++ .../ny/global_mmlu_full_ny_human_aging.yaml | 5 ++ .../global_mmlu_full_ny_human_sexuality.yaml | 5 ++ ...global_mmlu_full_ny_international_law.yaml | 5 ++ .../ny/global_mmlu_full_ny_jurisprudence.yaml | 5 ++ ...global_mmlu_full_ny_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_ny_machine_learning.yaml | 5 ++ .../ny/global_mmlu_full_ny_management.yaml | 5 ++ .../ny/global_mmlu_full_ny_marketing.yaml | 5 ++ .../global_mmlu_full_ny_medical_genetics.yaml | 5 ++ .../ny/global_mmlu_full_ny_miscellaneous.yaml | 5 ++ .../global_mmlu_full_ny_moral_disputes.yaml | 5 ++ .../global_mmlu_full_ny_moral_scenarios.yaml | 5 ++ .../ny/global_mmlu_full_ny_nutrition.yaml | 5 ++ .../ny/global_mmlu_full_ny_philosophy.yaml | 5 ++ .../ny/global_mmlu_full_ny_prehistory.yaml | 5 ++ ..._mmlu_full_ny_professional_accounting.yaml | 5 ++ .../global_mmlu_full_ny_professional_law.yaml | 5 ++ ...al_mmlu_full_ny_professional_medicine.yaml | 5 ++ ..._mmlu_full_ny_professional_psychology.yaml | 5 ++ .../global_mmlu_full_ny_public_relations.yaml | 5 ++ .../global_mmlu_full_ny_security_studies.yaml | 5 ++ .../ny/global_mmlu_full_ny_sociology.yaml | 5 ++ ...global_mmlu_full_ny_us_foreign_policy.yaml | 5 ++ .../full/ny/global_mmlu_full_ny_virology.yaml | 5 ++ .../global_mmlu_full_ny_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/ny/utils.py | 73 +++++++++++++++++++ .../full/pl/_global_mmlu_full_pl.yaml | 11 +++ .../pl/_global_mmlu_full_pl_humanities.yaml | 8 ++ .../full/pl/_global_mmlu_full_pl_other.yaml | 8 ++ .../_global_mmlu_full_pl_social_sciences.yaml | 8 ++ .../full/pl/_global_mmlu_full_pl_stem.yaml | 8 ++ .../global_mmlu/full/pl/_pl_template_yaml | 16 ++++ .../global_mmlu_full_pl_abstract_algebra.yaml | 5 ++ .../full/pl/global_mmlu_full_pl_anatomy.yaml | 5 ++ .../pl/global_mmlu_full_pl_astronomy.yaml | 5 ++ .../global_mmlu_full_pl_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_pl_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_pl_college_biology.yaml | 5 ++ ...global_mmlu_full_pl_college_chemistry.yaml | 5 ++ ...mmlu_full_pl_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_pl_college_mathematics.yaml | 5 ++ .../global_mmlu_full_pl_college_medicine.yaml | 5 ++ .../global_mmlu_full_pl_college_physics.yaml | 5 ++ ...global_mmlu_full_pl_computer_security.yaml | 5 ++ ...lobal_mmlu_full_pl_conceptual_physics.yaml | 5 ++ .../pl/global_mmlu_full_pl_econometrics.yaml | 5 ++ ...l_mmlu_full_pl_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_pl_elementary_mathematics.yaml | 5 ++ .../pl/global_mmlu_full_pl_formal_logic.yaml | 5 ++ .../pl/global_mmlu_full_pl_global_facts.yaml | 5 ++ ...obal_mmlu_full_pl_high_school_biology.yaml | 5 ++ ...al_mmlu_full_pl_high_school_chemistry.yaml | 5 ++ ..._full_pl_high_school_computer_science.yaml | 5 ++ ..._full_pl_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_pl_high_school_geography.yaml | 5 ++ ...l_high_school_government_and_politics.yaml | 5 ++ ...lu_full_pl_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_pl_high_school_mathematics.yaml | 5 ++ ...lu_full_pl_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_pl_high_school_physics.yaml | 5 ++ ...l_mmlu_full_pl_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_pl_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_pl_high_school_us_history.yaml | 5 ++ ...mlu_full_pl_high_school_world_history.yaml | 5 ++ .../pl/global_mmlu_full_pl_human_aging.yaml | 5 ++ .../global_mmlu_full_pl_human_sexuality.yaml | 5 ++ ...global_mmlu_full_pl_international_law.yaml | 5 ++ .../pl/global_mmlu_full_pl_jurisprudence.yaml | 5 ++ ...global_mmlu_full_pl_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_pl_machine_learning.yaml | 5 ++ .../pl/global_mmlu_full_pl_management.yaml | 5 ++ .../pl/global_mmlu_full_pl_marketing.yaml | 5 ++ .../global_mmlu_full_pl_medical_genetics.yaml | 5 ++ .../pl/global_mmlu_full_pl_miscellaneous.yaml | 5 ++ .../global_mmlu_full_pl_moral_disputes.yaml | 5 ++ .../global_mmlu_full_pl_moral_scenarios.yaml | 5 ++ .../pl/global_mmlu_full_pl_nutrition.yaml | 5 ++ .../pl/global_mmlu_full_pl_philosophy.yaml | 5 ++ .../pl/global_mmlu_full_pl_prehistory.yaml | 5 ++ ..._mmlu_full_pl_professional_accounting.yaml | 5 ++ .../global_mmlu_full_pl_professional_law.yaml | 5 ++ ...al_mmlu_full_pl_professional_medicine.yaml | 5 ++ ..._mmlu_full_pl_professional_psychology.yaml | 5 ++ .../global_mmlu_full_pl_public_relations.yaml | 5 ++ .../global_mmlu_full_pl_security_studies.yaml | 5 ++ .../pl/global_mmlu_full_pl_sociology.yaml | 5 ++ ...global_mmlu_full_pl_us_foreign_policy.yaml | 5 ++ .../full/pl/global_mmlu_full_pl_virology.yaml | 5 ++ .../global_mmlu_full_pl_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/pl/utils.py | 73 +++++++++++++++++++ .../full/pt/_global_mmlu_full_pt.yaml | 11 +++ .../pt/_global_mmlu_full_pt_humanities.yaml | 8 ++ .../full/pt/_global_mmlu_full_pt_other.yaml | 8 ++ .../_global_mmlu_full_pt_social_sciences.yaml | 8 ++ .../full/pt/_global_mmlu_full_pt_stem.yaml | 8 ++ .../global_mmlu/full/pt/_pt_template_yaml | 16 ++++ .../global_mmlu_full_pt_abstract_algebra.yaml | 5 ++ .../full/pt/global_mmlu_full_pt_anatomy.yaml | 5 ++ .../pt/global_mmlu_full_pt_astronomy.yaml | 5 ++ .../global_mmlu_full_pt_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_pt_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_pt_college_biology.yaml | 5 ++ ...global_mmlu_full_pt_college_chemistry.yaml | 5 ++ ...mmlu_full_pt_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_pt_college_mathematics.yaml | 5 ++ .../global_mmlu_full_pt_college_medicine.yaml | 5 ++ .../global_mmlu_full_pt_college_physics.yaml | 5 ++ ...global_mmlu_full_pt_computer_security.yaml | 5 ++ ...lobal_mmlu_full_pt_conceptual_physics.yaml | 5 ++ .../pt/global_mmlu_full_pt_econometrics.yaml | 5 ++ ...l_mmlu_full_pt_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_pt_elementary_mathematics.yaml | 5 ++ .../pt/global_mmlu_full_pt_formal_logic.yaml | 5 ++ .../pt/global_mmlu_full_pt_global_facts.yaml | 5 ++ ...obal_mmlu_full_pt_high_school_biology.yaml | 5 ++ ...al_mmlu_full_pt_high_school_chemistry.yaml | 5 ++ ..._full_pt_high_school_computer_science.yaml | 5 ++ ..._full_pt_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_pt_high_school_geography.yaml | 5 ++ ...t_high_school_government_and_politics.yaml | 5 ++ ...lu_full_pt_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_pt_high_school_mathematics.yaml | 5 ++ ...lu_full_pt_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_pt_high_school_physics.yaml | 5 ++ ...l_mmlu_full_pt_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_pt_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_pt_high_school_us_history.yaml | 5 ++ ...mlu_full_pt_high_school_world_history.yaml | 5 ++ .../pt/global_mmlu_full_pt_human_aging.yaml | 5 ++ .../global_mmlu_full_pt_human_sexuality.yaml | 5 ++ ...global_mmlu_full_pt_international_law.yaml | 5 ++ .../pt/global_mmlu_full_pt_jurisprudence.yaml | 5 ++ ...global_mmlu_full_pt_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_pt_machine_learning.yaml | 5 ++ .../pt/global_mmlu_full_pt_management.yaml | 5 ++ .../pt/global_mmlu_full_pt_marketing.yaml | 5 ++ .../global_mmlu_full_pt_medical_genetics.yaml | 5 ++ .../pt/global_mmlu_full_pt_miscellaneous.yaml | 5 ++ .../global_mmlu_full_pt_moral_disputes.yaml | 5 ++ .../global_mmlu_full_pt_moral_scenarios.yaml | 5 ++ .../pt/global_mmlu_full_pt_nutrition.yaml | 5 ++ .../pt/global_mmlu_full_pt_philosophy.yaml | 5 ++ .../pt/global_mmlu_full_pt_prehistory.yaml | 5 ++ ..._mmlu_full_pt_professional_accounting.yaml | 5 ++ .../global_mmlu_full_pt_professional_law.yaml | 5 ++ ...al_mmlu_full_pt_professional_medicine.yaml | 5 ++ ..._mmlu_full_pt_professional_psychology.yaml | 5 ++ .../global_mmlu_full_pt_public_relations.yaml | 5 ++ .../global_mmlu_full_pt_security_studies.yaml | 5 ++ .../pt/global_mmlu_full_pt_sociology.yaml | 5 ++ ...global_mmlu_full_pt_us_foreign_policy.yaml | 5 ++ .../full/pt/global_mmlu_full_pt_virology.yaml | 5 ++ .../global_mmlu_full_pt_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/pt/utils.py | 73 +++++++++++++++++++ .../full/ro/_global_mmlu_full_ro.yaml | 11 +++ .../ro/_global_mmlu_full_ro_humanities.yaml | 8 ++ .../full/ro/_global_mmlu_full_ro_other.yaml | 8 ++ .../_global_mmlu_full_ro_social_sciences.yaml | 8 ++ .../full/ro/_global_mmlu_full_ro_stem.yaml | 8 ++ .../global_mmlu/full/ro/_ro_template_yaml | 16 ++++ .../global_mmlu_full_ro_abstract_algebra.yaml | 5 ++ .../full/ro/global_mmlu_full_ro_anatomy.yaml | 5 ++ .../ro/global_mmlu_full_ro_astronomy.yaml | 5 ++ .../global_mmlu_full_ro_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_ro_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_ro_college_biology.yaml | 5 ++ ...global_mmlu_full_ro_college_chemistry.yaml | 5 ++ ...mmlu_full_ro_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_ro_college_mathematics.yaml | 5 ++ .../global_mmlu_full_ro_college_medicine.yaml | 5 ++ .../global_mmlu_full_ro_college_physics.yaml | 5 ++ ...global_mmlu_full_ro_computer_security.yaml | 5 ++ ...lobal_mmlu_full_ro_conceptual_physics.yaml | 5 ++ .../ro/global_mmlu_full_ro_econometrics.yaml | 5 ++ ...l_mmlu_full_ro_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_ro_elementary_mathematics.yaml | 5 ++ .../ro/global_mmlu_full_ro_formal_logic.yaml | 5 ++ .../ro/global_mmlu_full_ro_global_facts.yaml | 5 ++ ...obal_mmlu_full_ro_high_school_biology.yaml | 5 ++ ...al_mmlu_full_ro_high_school_chemistry.yaml | 5 ++ ..._full_ro_high_school_computer_science.yaml | 5 ++ ..._full_ro_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_ro_high_school_geography.yaml | 5 ++ ...o_high_school_government_and_politics.yaml | 5 ++ ...lu_full_ro_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_ro_high_school_mathematics.yaml | 5 ++ ...lu_full_ro_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_ro_high_school_physics.yaml | 5 ++ ...l_mmlu_full_ro_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_ro_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_ro_high_school_us_history.yaml | 5 ++ ...mlu_full_ro_high_school_world_history.yaml | 5 ++ .../ro/global_mmlu_full_ro_human_aging.yaml | 5 ++ .../global_mmlu_full_ro_human_sexuality.yaml | 5 ++ ...global_mmlu_full_ro_international_law.yaml | 5 ++ .../ro/global_mmlu_full_ro_jurisprudence.yaml | 5 ++ ...global_mmlu_full_ro_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_ro_machine_learning.yaml | 5 ++ .../ro/global_mmlu_full_ro_management.yaml | 5 ++ .../ro/global_mmlu_full_ro_marketing.yaml | 5 ++ .../global_mmlu_full_ro_medical_genetics.yaml | 5 ++ .../ro/global_mmlu_full_ro_miscellaneous.yaml | 5 ++ .../global_mmlu_full_ro_moral_disputes.yaml | 5 ++ .../global_mmlu_full_ro_moral_scenarios.yaml | 5 ++ .../ro/global_mmlu_full_ro_nutrition.yaml | 5 ++ .../ro/global_mmlu_full_ro_philosophy.yaml | 5 ++ .../ro/global_mmlu_full_ro_prehistory.yaml | 5 ++ ..._mmlu_full_ro_professional_accounting.yaml | 5 ++ .../global_mmlu_full_ro_professional_law.yaml | 5 ++ ...al_mmlu_full_ro_professional_medicine.yaml | 5 ++ ..._mmlu_full_ro_professional_psychology.yaml | 5 ++ .../global_mmlu_full_ro_public_relations.yaml | 5 ++ .../global_mmlu_full_ro_security_studies.yaml | 5 ++ .../ro/global_mmlu_full_ro_sociology.yaml | 5 ++ ...global_mmlu_full_ro_us_foreign_policy.yaml | 5 ++ .../full/ro/global_mmlu_full_ro_virology.yaml | 5 ++ .../global_mmlu_full_ro_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/ro/utils.py | 73 +++++++++++++++++++ .../full/ru/_global_mmlu_full_ru.yaml | 11 +++ .../ru/_global_mmlu_full_ru_humanities.yaml | 8 ++ .../full/ru/_global_mmlu_full_ru_other.yaml | 8 ++ .../_global_mmlu_full_ru_social_sciences.yaml | 8 ++ .../full/ru/_global_mmlu_full_ru_stem.yaml | 8 ++ .../global_mmlu/full/ru/_ru_template_yaml | 16 ++++ .../global_mmlu_full_ru_abstract_algebra.yaml | 5 ++ .../full/ru/global_mmlu_full_ru_anatomy.yaml | 5 ++ .../ru/global_mmlu_full_ru_astronomy.yaml | 5 ++ .../global_mmlu_full_ru_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_ru_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_ru_college_biology.yaml | 5 ++ ...global_mmlu_full_ru_college_chemistry.yaml | 5 ++ ...mmlu_full_ru_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_ru_college_mathematics.yaml | 5 ++ .../global_mmlu_full_ru_college_medicine.yaml | 5 ++ .../global_mmlu_full_ru_college_physics.yaml | 5 ++ ...global_mmlu_full_ru_computer_security.yaml | 5 ++ ...lobal_mmlu_full_ru_conceptual_physics.yaml | 5 ++ .../ru/global_mmlu_full_ru_econometrics.yaml | 5 ++ ...l_mmlu_full_ru_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_ru_elementary_mathematics.yaml | 5 ++ .../ru/global_mmlu_full_ru_formal_logic.yaml | 5 ++ .../ru/global_mmlu_full_ru_global_facts.yaml | 5 ++ ...obal_mmlu_full_ru_high_school_biology.yaml | 5 ++ ...al_mmlu_full_ru_high_school_chemistry.yaml | 5 ++ ..._full_ru_high_school_computer_science.yaml | 5 ++ ..._full_ru_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_ru_high_school_geography.yaml | 5 ++ ...u_high_school_government_and_politics.yaml | 5 ++ ...lu_full_ru_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_ru_high_school_mathematics.yaml | 5 ++ ...lu_full_ru_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_ru_high_school_physics.yaml | 5 ++ ...l_mmlu_full_ru_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_ru_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_ru_high_school_us_history.yaml | 5 ++ ...mlu_full_ru_high_school_world_history.yaml | 5 ++ .../ru/global_mmlu_full_ru_human_aging.yaml | 5 ++ .../global_mmlu_full_ru_human_sexuality.yaml | 5 ++ ...global_mmlu_full_ru_international_law.yaml | 5 ++ .../ru/global_mmlu_full_ru_jurisprudence.yaml | 5 ++ ...global_mmlu_full_ru_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_ru_machine_learning.yaml | 5 ++ .../ru/global_mmlu_full_ru_management.yaml | 5 ++ .../ru/global_mmlu_full_ru_marketing.yaml | 5 ++ .../global_mmlu_full_ru_medical_genetics.yaml | 5 ++ .../ru/global_mmlu_full_ru_miscellaneous.yaml | 5 ++ .../global_mmlu_full_ru_moral_disputes.yaml | 5 ++ .../global_mmlu_full_ru_moral_scenarios.yaml | 5 ++ .../ru/global_mmlu_full_ru_nutrition.yaml | 5 ++ .../ru/global_mmlu_full_ru_philosophy.yaml | 5 ++ .../ru/global_mmlu_full_ru_prehistory.yaml | 5 ++ ..._mmlu_full_ru_professional_accounting.yaml | 5 ++ .../global_mmlu_full_ru_professional_law.yaml | 5 ++ ...al_mmlu_full_ru_professional_medicine.yaml | 5 ++ ..._mmlu_full_ru_professional_psychology.yaml | 5 ++ .../global_mmlu_full_ru_public_relations.yaml | 5 ++ .../global_mmlu_full_ru_security_studies.yaml | 5 ++ .../ru/global_mmlu_full_ru_sociology.yaml | 5 ++ ...global_mmlu_full_ru_us_foreign_policy.yaml | 5 ++ .../full/ru/global_mmlu_full_ru_virology.yaml | 5 ++ .../global_mmlu_full_ru_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/ru/utils.py | 73 +++++++++++++++++++ .../full/si/_global_mmlu_full_si.yaml | 11 +++ .../si/_global_mmlu_full_si_humanities.yaml | 8 ++ .../full/si/_global_mmlu_full_si_other.yaml | 8 ++ .../_global_mmlu_full_si_social_sciences.yaml | 8 ++ .../full/si/_global_mmlu_full_si_stem.yaml | 8 ++ .../global_mmlu/full/si/_si_template_yaml | 16 ++++ .../global_mmlu_full_si_abstract_algebra.yaml | 5 ++ .../full/si/global_mmlu_full_si_anatomy.yaml | 5 ++ .../si/global_mmlu_full_si_astronomy.yaml | 5 ++ .../global_mmlu_full_si_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_si_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_si_college_biology.yaml | 5 ++ ...global_mmlu_full_si_college_chemistry.yaml | 5 ++ ...mmlu_full_si_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_si_college_mathematics.yaml | 5 ++ .../global_mmlu_full_si_college_medicine.yaml | 5 ++ .../global_mmlu_full_si_college_physics.yaml | 5 ++ ...global_mmlu_full_si_computer_security.yaml | 5 ++ ...lobal_mmlu_full_si_conceptual_physics.yaml | 5 ++ .../si/global_mmlu_full_si_econometrics.yaml | 5 ++ ...l_mmlu_full_si_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_si_elementary_mathematics.yaml | 5 ++ .../si/global_mmlu_full_si_formal_logic.yaml | 5 ++ .../si/global_mmlu_full_si_global_facts.yaml | 5 ++ ...obal_mmlu_full_si_high_school_biology.yaml | 5 ++ ...al_mmlu_full_si_high_school_chemistry.yaml | 5 ++ ..._full_si_high_school_computer_science.yaml | 5 ++ ..._full_si_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_si_high_school_geography.yaml | 5 ++ ...i_high_school_government_and_politics.yaml | 5 ++ ...lu_full_si_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_si_high_school_mathematics.yaml | 5 ++ ...lu_full_si_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_si_high_school_physics.yaml | 5 ++ ...l_mmlu_full_si_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_si_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_si_high_school_us_history.yaml | 5 ++ ...mlu_full_si_high_school_world_history.yaml | 5 ++ .../si/global_mmlu_full_si_human_aging.yaml | 5 ++ .../global_mmlu_full_si_human_sexuality.yaml | 5 ++ ...global_mmlu_full_si_international_law.yaml | 5 ++ .../si/global_mmlu_full_si_jurisprudence.yaml | 5 ++ ...global_mmlu_full_si_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_si_machine_learning.yaml | 5 ++ .../si/global_mmlu_full_si_management.yaml | 5 ++ .../si/global_mmlu_full_si_marketing.yaml | 5 ++ .../global_mmlu_full_si_medical_genetics.yaml | 5 ++ .../si/global_mmlu_full_si_miscellaneous.yaml | 5 ++ .../global_mmlu_full_si_moral_disputes.yaml | 5 ++ .../global_mmlu_full_si_moral_scenarios.yaml | 5 ++ .../si/global_mmlu_full_si_nutrition.yaml | 5 ++ .../si/global_mmlu_full_si_philosophy.yaml | 5 ++ .../si/global_mmlu_full_si_prehistory.yaml | 5 ++ ..._mmlu_full_si_professional_accounting.yaml | 5 ++ .../global_mmlu_full_si_professional_law.yaml | 5 ++ ...al_mmlu_full_si_professional_medicine.yaml | 5 ++ ..._mmlu_full_si_professional_psychology.yaml | 5 ++ .../global_mmlu_full_si_public_relations.yaml | 5 ++ .../global_mmlu_full_si_security_studies.yaml | 5 ++ .../si/global_mmlu_full_si_sociology.yaml | 5 ++ ...global_mmlu_full_si_us_foreign_policy.yaml | 5 ++ .../full/si/global_mmlu_full_si_virology.yaml | 5 ++ .../global_mmlu_full_si_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/si/utils.py | 73 +++++++++++++++++++ .../full/sn/_global_mmlu_full_sn.yaml | 11 +++ .../sn/_global_mmlu_full_sn_humanities.yaml | 8 ++ .../full/sn/_global_mmlu_full_sn_other.yaml | 8 ++ .../_global_mmlu_full_sn_social_sciences.yaml | 8 ++ .../full/sn/_global_mmlu_full_sn_stem.yaml | 8 ++ .../global_mmlu/full/sn/_sn_template_yaml | 16 ++++ .../global_mmlu_full_sn_abstract_algebra.yaml | 5 ++ .../full/sn/global_mmlu_full_sn_anatomy.yaml | 5 ++ .../sn/global_mmlu_full_sn_astronomy.yaml | 5 ++ .../global_mmlu_full_sn_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_sn_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_sn_college_biology.yaml | 5 ++ ...global_mmlu_full_sn_college_chemistry.yaml | 5 ++ ...mmlu_full_sn_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_sn_college_mathematics.yaml | 5 ++ .../global_mmlu_full_sn_college_medicine.yaml | 5 ++ .../global_mmlu_full_sn_college_physics.yaml | 5 ++ ...global_mmlu_full_sn_computer_security.yaml | 5 ++ ...lobal_mmlu_full_sn_conceptual_physics.yaml | 5 ++ .../sn/global_mmlu_full_sn_econometrics.yaml | 5 ++ ...l_mmlu_full_sn_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_sn_elementary_mathematics.yaml | 5 ++ .../sn/global_mmlu_full_sn_formal_logic.yaml | 5 ++ .../sn/global_mmlu_full_sn_global_facts.yaml | 5 ++ ...obal_mmlu_full_sn_high_school_biology.yaml | 5 ++ ...al_mmlu_full_sn_high_school_chemistry.yaml | 5 ++ ..._full_sn_high_school_computer_science.yaml | 5 ++ ..._full_sn_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_sn_high_school_geography.yaml | 5 ++ ...n_high_school_government_and_politics.yaml | 5 ++ ...lu_full_sn_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_sn_high_school_mathematics.yaml | 5 ++ ...lu_full_sn_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_sn_high_school_physics.yaml | 5 ++ ...l_mmlu_full_sn_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_sn_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_sn_high_school_us_history.yaml | 5 ++ ...mlu_full_sn_high_school_world_history.yaml | 5 ++ .../sn/global_mmlu_full_sn_human_aging.yaml | 5 ++ .../global_mmlu_full_sn_human_sexuality.yaml | 5 ++ ...global_mmlu_full_sn_international_law.yaml | 5 ++ .../sn/global_mmlu_full_sn_jurisprudence.yaml | 5 ++ ...global_mmlu_full_sn_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_sn_machine_learning.yaml | 5 ++ .../sn/global_mmlu_full_sn_management.yaml | 5 ++ .../sn/global_mmlu_full_sn_marketing.yaml | 5 ++ .../global_mmlu_full_sn_medical_genetics.yaml | 5 ++ .../sn/global_mmlu_full_sn_miscellaneous.yaml | 5 ++ .../global_mmlu_full_sn_moral_disputes.yaml | 5 ++ .../global_mmlu_full_sn_moral_scenarios.yaml | 5 ++ .../sn/global_mmlu_full_sn_nutrition.yaml | 5 ++ .../sn/global_mmlu_full_sn_philosophy.yaml | 5 ++ .../sn/global_mmlu_full_sn_prehistory.yaml | 5 ++ ..._mmlu_full_sn_professional_accounting.yaml | 5 ++ .../global_mmlu_full_sn_professional_law.yaml | 5 ++ ...al_mmlu_full_sn_professional_medicine.yaml | 5 ++ ..._mmlu_full_sn_professional_psychology.yaml | 5 ++ .../global_mmlu_full_sn_public_relations.yaml | 5 ++ .../global_mmlu_full_sn_security_studies.yaml | 5 ++ .../sn/global_mmlu_full_sn_sociology.yaml | 5 ++ ...global_mmlu_full_sn_us_foreign_policy.yaml | 5 ++ .../full/sn/global_mmlu_full_sn_virology.yaml | 5 ++ .../global_mmlu_full_sn_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/sn/utils.py | 73 +++++++++++++++++++ .../full/so/_global_mmlu_full_so.yaml | 11 +++ .../so/_global_mmlu_full_so_humanities.yaml | 8 ++ .../full/so/_global_mmlu_full_so_other.yaml | 8 ++ .../_global_mmlu_full_so_social_sciences.yaml | 8 ++ .../full/so/_global_mmlu_full_so_stem.yaml | 8 ++ .../global_mmlu/full/so/_so_template_yaml | 16 ++++ .../global_mmlu_full_so_abstract_algebra.yaml | 5 ++ .../full/so/global_mmlu_full_so_anatomy.yaml | 5 ++ .../so/global_mmlu_full_so_astronomy.yaml | 5 ++ .../global_mmlu_full_so_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_so_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_so_college_biology.yaml | 5 ++ ...global_mmlu_full_so_college_chemistry.yaml | 5 ++ ...mmlu_full_so_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_so_college_mathematics.yaml | 5 ++ .../global_mmlu_full_so_college_medicine.yaml | 5 ++ .../global_mmlu_full_so_college_physics.yaml | 5 ++ ...global_mmlu_full_so_computer_security.yaml | 5 ++ ...lobal_mmlu_full_so_conceptual_physics.yaml | 5 ++ .../so/global_mmlu_full_so_econometrics.yaml | 5 ++ ...l_mmlu_full_so_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_so_elementary_mathematics.yaml | 5 ++ .../so/global_mmlu_full_so_formal_logic.yaml | 5 ++ .../so/global_mmlu_full_so_global_facts.yaml | 5 ++ ...obal_mmlu_full_so_high_school_biology.yaml | 5 ++ ...al_mmlu_full_so_high_school_chemistry.yaml | 5 ++ ..._full_so_high_school_computer_science.yaml | 5 ++ ..._full_so_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_so_high_school_geography.yaml | 5 ++ ...o_high_school_government_and_politics.yaml | 5 ++ ...lu_full_so_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_so_high_school_mathematics.yaml | 5 ++ ...lu_full_so_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_so_high_school_physics.yaml | 5 ++ ...l_mmlu_full_so_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_so_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_so_high_school_us_history.yaml | 5 ++ ...mlu_full_so_high_school_world_history.yaml | 5 ++ .../so/global_mmlu_full_so_human_aging.yaml | 5 ++ .../global_mmlu_full_so_human_sexuality.yaml | 5 ++ ...global_mmlu_full_so_international_law.yaml | 5 ++ .../so/global_mmlu_full_so_jurisprudence.yaml | 5 ++ ...global_mmlu_full_so_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_so_machine_learning.yaml | 5 ++ .../so/global_mmlu_full_so_management.yaml | 5 ++ .../so/global_mmlu_full_so_marketing.yaml | 5 ++ .../global_mmlu_full_so_medical_genetics.yaml | 5 ++ .../so/global_mmlu_full_so_miscellaneous.yaml | 5 ++ .../global_mmlu_full_so_moral_disputes.yaml | 5 ++ .../global_mmlu_full_so_moral_scenarios.yaml | 5 ++ .../so/global_mmlu_full_so_nutrition.yaml | 5 ++ .../so/global_mmlu_full_so_philosophy.yaml | 5 ++ .../so/global_mmlu_full_so_prehistory.yaml | 5 ++ ..._mmlu_full_so_professional_accounting.yaml | 5 ++ .../global_mmlu_full_so_professional_law.yaml | 5 ++ ...al_mmlu_full_so_professional_medicine.yaml | 5 ++ ..._mmlu_full_so_professional_psychology.yaml | 5 ++ .../global_mmlu_full_so_public_relations.yaml | 5 ++ .../global_mmlu_full_so_security_studies.yaml | 5 ++ .../so/global_mmlu_full_so_sociology.yaml | 5 ++ ...global_mmlu_full_so_us_foreign_policy.yaml | 5 ++ .../full/so/global_mmlu_full_so_virology.yaml | 5 ++ .../global_mmlu_full_so_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/so/utils.py | 73 +++++++++++++++++++ .../full/sr/_global_mmlu_full_sr.yaml | 11 +++ .../sr/_global_mmlu_full_sr_humanities.yaml | 8 ++ .../full/sr/_global_mmlu_full_sr_other.yaml | 8 ++ .../_global_mmlu_full_sr_social_sciences.yaml | 8 ++ .../full/sr/_global_mmlu_full_sr_stem.yaml | 8 ++ .../global_mmlu/full/sr/_sr_template_yaml | 16 ++++ .../global_mmlu_full_sr_abstract_algebra.yaml | 5 ++ .../full/sr/global_mmlu_full_sr_anatomy.yaml | 5 ++ .../sr/global_mmlu_full_sr_astronomy.yaml | 5 ++ .../global_mmlu_full_sr_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_sr_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_sr_college_biology.yaml | 5 ++ ...global_mmlu_full_sr_college_chemistry.yaml | 5 ++ ...mmlu_full_sr_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_sr_college_mathematics.yaml | 5 ++ .../global_mmlu_full_sr_college_medicine.yaml | 5 ++ .../global_mmlu_full_sr_college_physics.yaml | 5 ++ ...global_mmlu_full_sr_computer_security.yaml | 5 ++ ...lobal_mmlu_full_sr_conceptual_physics.yaml | 5 ++ .../sr/global_mmlu_full_sr_econometrics.yaml | 5 ++ ...l_mmlu_full_sr_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_sr_elementary_mathematics.yaml | 5 ++ .../sr/global_mmlu_full_sr_formal_logic.yaml | 5 ++ .../sr/global_mmlu_full_sr_global_facts.yaml | 5 ++ ...obal_mmlu_full_sr_high_school_biology.yaml | 5 ++ ...al_mmlu_full_sr_high_school_chemistry.yaml | 5 ++ ..._full_sr_high_school_computer_science.yaml | 5 ++ ..._full_sr_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_sr_high_school_geography.yaml | 5 ++ ...r_high_school_government_and_politics.yaml | 5 ++ ...lu_full_sr_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_sr_high_school_mathematics.yaml | 5 ++ ...lu_full_sr_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_sr_high_school_physics.yaml | 5 ++ ...l_mmlu_full_sr_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_sr_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_sr_high_school_us_history.yaml | 5 ++ ...mlu_full_sr_high_school_world_history.yaml | 5 ++ .../sr/global_mmlu_full_sr_human_aging.yaml | 5 ++ .../global_mmlu_full_sr_human_sexuality.yaml | 5 ++ ...global_mmlu_full_sr_international_law.yaml | 5 ++ .../sr/global_mmlu_full_sr_jurisprudence.yaml | 5 ++ ...global_mmlu_full_sr_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_sr_machine_learning.yaml | 5 ++ .../sr/global_mmlu_full_sr_management.yaml | 5 ++ .../sr/global_mmlu_full_sr_marketing.yaml | 5 ++ .../global_mmlu_full_sr_medical_genetics.yaml | 5 ++ .../sr/global_mmlu_full_sr_miscellaneous.yaml | 5 ++ .../global_mmlu_full_sr_moral_disputes.yaml | 5 ++ .../global_mmlu_full_sr_moral_scenarios.yaml | 5 ++ .../sr/global_mmlu_full_sr_nutrition.yaml | 5 ++ .../sr/global_mmlu_full_sr_philosophy.yaml | 5 ++ .../sr/global_mmlu_full_sr_prehistory.yaml | 5 ++ ..._mmlu_full_sr_professional_accounting.yaml | 5 ++ .../global_mmlu_full_sr_professional_law.yaml | 5 ++ ...al_mmlu_full_sr_professional_medicine.yaml | 5 ++ ..._mmlu_full_sr_professional_psychology.yaml | 5 ++ .../global_mmlu_full_sr_public_relations.yaml | 5 ++ .../global_mmlu_full_sr_security_studies.yaml | 5 ++ .../sr/global_mmlu_full_sr_sociology.yaml | 5 ++ ...global_mmlu_full_sr_us_foreign_policy.yaml | 5 ++ .../full/sr/global_mmlu_full_sr_virology.yaml | 5 ++ .../global_mmlu_full_sr_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/sr/utils.py | 73 +++++++++++++++++++ .../full/sv/_global_mmlu_full_sv.yaml | 11 +++ .../sv/_global_mmlu_full_sv_humanities.yaml | 8 ++ .../full/sv/_global_mmlu_full_sv_other.yaml | 8 ++ .../_global_mmlu_full_sv_social_sciences.yaml | 8 ++ .../full/sv/_global_mmlu_full_sv_stem.yaml | 8 ++ .../global_mmlu/full/sv/_sv_template_yaml | 16 ++++ .../global_mmlu_full_sv_abstract_algebra.yaml | 5 ++ .../full/sv/global_mmlu_full_sv_anatomy.yaml | 5 ++ .../sv/global_mmlu_full_sv_astronomy.yaml | 5 ++ .../global_mmlu_full_sv_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_sv_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_sv_college_biology.yaml | 5 ++ ...global_mmlu_full_sv_college_chemistry.yaml | 5 ++ ...mmlu_full_sv_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_sv_college_mathematics.yaml | 5 ++ .../global_mmlu_full_sv_college_medicine.yaml | 5 ++ .../global_mmlu_full_sv_college_physics.yaml | 5 ++ ...global_mmlu_full_sv_computer_security.yaml | 5 ++ ...lobal_mmlu_full_sv_conceptual_physics.yaml | 5 ++ .../sv/global_mmlu_full_sv_econometrics.yaml | 5 ++ ...l_mmlu_full_sv_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_sv_elementary_mathematics.yaml | 5 ++ .../sv/global_mmlu_full_sv_formal_logic.yaml | 5 ++ .../sv/global_mmlu_full_sv_global_facts.yaml | 5 ++ ...obal_mmlu_full_sv_high_school_biology.yaml | 5 ++ ...al_mmlu_full_sv_high_school_chemistry.yaml | 5 ++ ..._full_sv_high_school_computer_science.yaml | 5 ++ ..._full_sv_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_sv_high_school_geography.yaml | 5 ++ ...v_high_school_government_and_politics.yaml | 5 ++ ...lu_full_sv_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_sv_high_school_mathematics.yaml | 5 ++ ...lu_full_sv_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_sv_high_school_physics.yaml | 5 ++ ...l_mmlu_full_sv_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_sv_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_sv_high_school_us_history.yaml | 5 ++ ...mlu_full_sv_high_school_world_history.yaml | 5 ++ .../sv/global_mmlu_full_sv_human_aging.yaml | 5 ++ .../global_mmlu_full_sv_human_sexuality.yaml | 5 ++ ...global_mmlu_full_sv_international_law.yaml | 5 ++ .../sv/global_mmlu_full_sv_jurisprudence.yaml | 5 ++ ...global_mmlu_full_sv_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_sv_machine_learning.yaml | 5 ++ .../sv/global_mmlu_full_sv_management.yaml | 5 ++ .../sv/global_mmlu_full_sv_marketing.yaml | 5 ++ .../global_mmlu_full_sv_medical_genetics.yaml | 5 ++ .../sv/global_mmlu_full_sv_miscellaneous.yaml | 5 ++ .../global_mmlu_full_sv_moral_disputes.yaml | 5 ++ .../global_mmlu_full_sv_moral_scenarios.yaml | 5 ++ .../sv/global_mmlu_full_sv_nutrition.yaml | 5 ++ .../sv/global_mmlu_full_sv_philosophy.yaml | 5 ++ .../sv/global_mmlu_full_sv_prehistory.yaml | 5 ++ ..._mmlu_full_sv_professional_accounting.yaml | 5 ++ .../global_mmlu_full_sv_professional_law.yaml | 5 ++ ...al_mmlu_full_sv_professional_medicine.yaml | 5 ++ ..._mmlu_full_sv_professional_psychology.yaml | 5 ++ .../global_mmlu_full_sv_public_relations.yaml | 5 ++ .../global_mmlu_full_sv_security_studies.yaml | 5 ++ .../sv/global_mmlu_full_sv_sociology.yaml | 5 ++ ...global_mmlu_full_sv_us_foreign_policy.yaml | 5 ++ .../full/sv/global_mmlu_full_sv_virology.yaml | 5 ++ .../global_mmlu_full_sv_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/sv/utils.py | 73 +++++++++++++++++++ .../full/sw/_global_mmlu_full_sw.yaml | 11 +++ .../sw/_global_mmlu_full_sw_humanities.yaml | 8 ++ .../full/sw/_global_mmlu_full_sw_other.yaml | 8 ++ .../_global_mmlu_full_sw_social_sciences.yaml | 8 ++ .../full/sw/_global_mmlu_full_sw_stem.yaml | 8 ++ .../global_mmlu/full/sw/_sw_template_yaml | 16 ++++ .../global_mmlu_full_sw_abstract_algebra.yaml | 5 ++ .../full/sw/global_mmlu_full_sw_anatomy.yaml | 5 ++ .../sw/global_mmlu_full_sw_astronomy.yaml | 5 ++ .../global_mmlu_full_sw_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_sw_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_sw_college_biology.yaml | 5 ++ ...global_mmlu_full_sw_college_chemistry.yaml | 5 ++ ...mmlu_full_sw_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_sw_college_mathematics.yaml | 5 ++ .../global_mmlu_full_sw_college_medicine.yaml | 5 ++ .../global_mmlu_full_sw_college_physics.yaml | 5 ++ ...global_mmlu_full_sw_computer_security.yaml | 5 ++ ...lobal_mmlu_full_sw_conceptual_physics.yaml | 5 ++ .../sw/global_mmlu_full_sw_econometrics.yaml | 5 ++ ...l_mmlu_full_sw_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_sw_elementary_mathematics.yaml | 5 ++ .../sw/global_mmlu_full_sw_formal_logic.yaml | 5 ++ .../sw/global_mmlu_full_sw_global_facts.yaml | 5 ++ ...obal_mmlu_full_sw_high_school_biology.yaml | 5 ++ ...al_mmlu_full_sw_high_school_chemistry.yaml | 5 ++ ..._full_sw_high_school_computer_science.yaml | 5 ++ ..._full_sw_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_sw_high_school_geography.yaml | 5 ++ ...w_high_school_government_and_politics.yaml | 5 ++ ...lu_full_sw_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_sw_high_school_mathematics.yaml | 5 ++ ...lu_full_sw_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_sw_high_school_physics.yaml | 5 ++ ...l_mmlu_full_sw_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_sw_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_sw_high_school_us_history.yaml | 5 ++ ...mlu_full_sw_high_school_world_history.yaml | 5 ++ .../sw/global_mmlu_full_sw_human_aging.yaml | 5 ++ .../global_mmlu_full_sw_human_sexuality.yaml | 5 ++ ...global_mmlu_full_sw_international_law.yaml | 5 ++ .../sw/global_mmlu_full_sw_jurisprudence.yaml | 5 ++ ...global_mmlu_full_sw_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_sw_machine_learning.yaml | 5 ++ .../sw/global_mmlu_full_sw_management.yaml | 5 ++ .../sw/global_mmlu_full_sw_marketing.yaml | 5 ++ .../global_mmlu_full_sw_medical_genetics.yaml | 5 ++ .../sw/global_mmlu_full_sw_miscellaneous.yaml | 5 ++ .../global_mmlu_full_sw_moral_disputes.yaml | 5 ++ .../global_mmlu_full_sw_moral_scenarios.yaml | 5 ++ .../sw/global_mmlu_full_sw_nutrition.yaml | 5 ++ .../sw/global_mmlu_full_sw_philosophy.yaml | 5 ++ .../sw/global_mmlu_full_sw_prehistory.yaml | 5 ++ ..._mmlu_full_sw_professional_accounting.yaml | 5 ++ .../global_mmlu_full_sw_professional_law.yaml | 5 ++ ...al_mmlu_full_sw_professional_medicine.yaml | 5 ++ ..._mmlu_full_sw_professional_psychology.yaml | 5 ++ .../global_mmlu_full_sw_public_relations.yaml | 5 ++ .../global_mmlu_full_sw_security_studies.yaml | 5 ++ .../sw/global_mmlu_full_sw_sociology.yaml | 5 ++ ...global_mmlu_full_sw_us_foreign_policy.yaml | 5 ++ .../full/sw/global_mmlu_full_sw_virology.yaml | 5 ++ .../global_mmlu_full_sw_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/sw/utils.py | 73 +++++++++++++++++++ .../full/te/_global_mmlu_full_te.yaml | 11 +++ .../te/_global_mmlu_full_te_humanities.yaml | 8 ++ .../full/te/_global_mmlu_full_te_other.yaml | 8 ++ .../_global_mmlu_full_te_social_sciences.yaml | 8 ++ .../full/te/_global_mmlu_full_te_stem.yaml | 8 ++ .../global_mmlu/full/te/_te_template_yaml | 16 ++++ .../global_mmlu_full_te_abstract_algebra.yaml | 5 ++ .../full/te/global_mmlu_full_te_anatomy.yaml | 5 ++ .../te/global_mmlu_full_te_astronomy.yaml | 5 ++ .../global_mmlu_full_te_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_te_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_te_college_biology.yaml | 5 ++ ...global_mmlu_full_te_college_chemistry.yaml | 5 ++ ...mmlu_full_te_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_te_college_mathematics.yaml | 5 ++ .../global_mmlu_full_te_college_medicine.yaml | 5 ++ .../global_mmlu_full_te_college_physics.yaml | 5 ++ ...global_mmlu_full_te_computer_security.yaml | 5 ++ ...lobal_mmlu_full_te_conceptual_physics.yaml | 5 ++ .../te/global_mmlu_full_te_econometrics.yaml | 5 ++ ...l_mmlu_full_te_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_te_elementary_mathematics.yaml | 5 ++ .../te/global_mmlu_full_te_formal_logic.yaml | 5 ++ .../te/global_mmlu_full_te_global_facts.yaml | 5 ++ ...obal_mmlu_full_te_high_school_biology.yaml | 5 ++ ...al_mmlu_full_te_high_school_chemistry.yaml | 5 ++ ..._full_te_high_school_computer_science.yaml | 5 ++ ..._full_te_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_te_high_school_geography.yaml | 5 ++ ...e_high_school_government_and_politics.yaml | 5 ++ ...lu_full_te_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_te_high_school_mathematics.yaml | 5 ++ ...lu_full_te_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_te_high_school_physics.yaml | 5 ++ ...l_mmlu_full_te_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_te_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_te_high_school_us_history.yaml | 5 ++ ...mlu_full_te_high_school_world_history.yaml | 5 ++ .../te/global_mmlu_full_te_human_aging.yaml | 5 ++ .../global_mmlu_full_te_human_sexuality.yaml | 5 ++ ...global_mmlu_full_te_international_law.yaml | 5 ++ .../te/global_mmlu_full_te_jurisprudence.yaml | 5 ++ ...global_mmlu_full_te_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_te_machine_learning.yaml | 5 ++ .../te/global_mmlu_full_te_management.yaml | 5 ++ .../te/global_mmlu_full_te_marketing.yaml | 5 ++ .../global_mmlu_full_te_medical_genetics.yaml | 5 ++ .../te/global_mmlu_full_te_miscellaneous.yaml | 5 ++ .../global_mmlu_full_te_moral_disputes.yaml | 5 ++ .../global_mmlu_full_te_moral_scenarios.yaml | 5 ++ .../te/global_mmlu_full_te_nutrition.yaml | 5 ++ .../te/global_mmlu_full_te_philosophy.yaml | 5 ++ .../te/global_mmlu_full_te_prehistory.yaml | 5 ++ ..._mmlu_full_te_professional_accounting.yaml | 5 ++ .../global_mmlu_full_te_professional_law.yaml | 5 ++ ...al_mmlu_full_te_professional_medicine.yaml | 5 ++ ..._mmlu_full_te_professional_psychology.yaml | 5 ++ .../global_mmlu_full_te_public_relations.yaml | 5 ++ .../global_mmlu_full_te_security_studies.yaml | 5 ++ .../te/global_mmlu_full_te_sociology.yaml | 5 ++ ...global_mmlu_full_te_us_foreign_policy.yaml | 5 ++ .../full/te/global_mmlu_full_te_virology.yaml | 5 ++ .../global_mmlu_full_te_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/te/utils.py | 73 +++++++++++++++++++ .../full/tr/_global_mmlu_full_tr.yaml | 11 +++ .../tr/_global_mmlu_full_tr_humanities.yaml | 8 ++ .../full/tr/_global_mmlu_full_tr_other.yaml | 8 ++ .../_global_mmlu_full_tr_social_sciences.yaml | 8 ++ .../full/tr/_global_mmlu_full_tr_stem.yaml | 8 ++ .../global_mmlu/full/tr/_tr_template_yaml | 16 ++++ .../global_mmlu_full_tr_abstract_algebra.yaml | 5 ++ .../full/tr/global_mmlu_full_tr_anatomy.yaml | 5 ++ .../tr/global_mmlu_full_tr_astronomy.yaml | 5 ++ .../global_mmlu_full_tr_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_tr_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_tr_college_biology.yaml | 5 ++ ...global_mmlu_full_tr_college_chemistry.yaml | 5 ++ ...mmlu_full_tr_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_tr_college_mathematics.yaml | 5 ++ .../global_mmlu_full_tr_college_medicine.yaml | 5 ++ .../global_mmlu_full_tr_college_physics.yaml | 5 ++ ...global_mmlu_full_tr_computer_security.yaml | 5 ++ ...lobal_mmlu_full_tr_conceptual_physics.yaml | 5 ++ .../tr/global_mmlu_full_tr_econometrics.yaml | 5 ++ ...l_mmlu_full_tr_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_tr_elementary_mathematics.yaml | 5 ++ .../tr/global_mmlu_full_tr_formal_logic.yaml | 5 ++ .../tr/global_mmlu_full_tr_global_facts.yaml | 5 ++ ...obal_mmlu_full_tr_high_school_biology.yaml | 5 ++ ...al_mmlu_full_tr_high_school_chemistry.yaml | 5 ++ ..._full_tr_high_school_computer_science.yaml | 5 ++ ..._full_tr_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_tr_high_school_geography.yaml | 5 ++ ...r_high_school_government_and_politics.yaml | 5 ++ ...lu_full_tr_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_tr_high_school_mathematics.yaml | 5 ++ ...lu_full_tr_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_tr_high_school_physics.yaml | 5 ++ ...l_mmlu_full_tr_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_tr_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_tr_high_school_us_history.yaml | 5 ++ ...mlu_full_tr_high_school_world_history.yaml | 5 ++ .../tr/global_mmlu_full_tr_human_aging.yaml | 5 ++ .../global_mmlu_full_tr_human_sexuality.yaml | 5 ++ ...global_mmlu_full_tr_international_law.yaml | 5 ++ .../tr/global_mmlu_full_tr_jurisprudence.yaml | 5 ++ ...global_mmlu_full_tr_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_tr_machine_learning.yaml | 5 ++ .../tr/global_mmlu_full_tr_management.yaml | 5 ++ .../tr/global_mmlu_full_tr_marketing.yaml | 5 ++ .../global_mmlu_full_tr_medical_genetics.yaml | 5 ++ .../tr/global_mmlu_full_tr_miscellaneous.yaml | 5 ++ .../global_mmlu_full_tr_moral_disputes.yaml | 5 ++ .../global_mmlu_full_tr_moral_scenarios.yaml | 5 ++ .../tr/global_mmlu_full_tr_nutrition.yaml | 5 ++ .../tr/global_mmlu_full_tr_philosophy.yaml | 5 ++ .../tr/global_mmlu_full_tr_prehistory.yaml | 5 ++ ..._mmlu_full_tr_professional_accounting.yaml | 5 ++ .../global_mmlu_full_tr_professional_law.yaml | 5 ++ ...al_mmlu_full_tr_professional_medicine.yaml | 5 ++ ..._mmlu_full_tr_professional_psychology.yaml | 5 ++ .../global_mmlu_full_tr_public_relations.yaml | 5 ++ .../global_mmlu_full_tr_security_studies.yaml | 5 ++ .../tr/global_mmlu_full_tr_sociology.yaml | 5 ++ ...global_mmlu_full_tr_us_foreign_policy.yaml | 5 ++ .../full/tr/global_mmlu_full_tr_virology.yaml | 5 ++ .../global_mmlu_full_tr_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/tr/utils.py | 73 +++++++++++++++++++ .../full/uk/_global_mmlu_full_uk.yaml | 11 +++ .../uk/_global_mmlu_full_uk_humanities.yaml | 8 ++ .../full/uk/_global_mmlu_full_uk_other.yaml | 8 ++ .../_global_mmlu_full_uk_social_sciences.yaml | 8 ++ .../full/uk/_global_mmlu_full_uk_stem.yaml | 8 ++ .../global_mmlu/full/uk/_uk_template_yaml | 16 ++++ .../global_mmlu_full_uk_abstract_algebra.yaml | 5 ++ .../full/uk/global_mmlu_full_uk_anatomy.yaml | 5 ++ .../uk/global_mmlu_full_uk_astronomy.yaml | 5 ++ .../global_mmlu_full_uk_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_uk_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_uk_college_biology.yaml | 5 ++ ...global_mmlu_full_uk_college_chemistry.yaml | 5 ++ ...mmlu_full_uk_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_uk_college_mathematics.yaml | 5 ++ .../global_mmlu_full_uk_college_medicine.yaml | 5 ++ .../global_mmlu_full_uk_college_physics.yaml | 5 ++ ...global_mmlu_full_uk_computer_security.yaml | 5 ++ ...lobal_mmlu_full_uk_conceptual_physics.yaml | 5 ++ .../uk/global_mmlu_full_uk_econometrics.yaml | 5 ++ ...l_mmlu_full_uk_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_uk_elementary_mathematics.yaml | 5 ++ .../uk/global_mmlu_full_uk_formal_logic.yaml | 5 ++ .../uk/global_mmlu_full_uk_global_facts.yaml | 5 ++ ...obal_mmlu_full_uk_high_school_biology.yaml | 5 ++ ...al_mmlu_full_uk_high_school_chemistry.yaml | 5 ++ ..._full_uk_high_school_computer_science.yaml | 5 ++ ..._full_uk_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_uk_high_school_geography.yaml | 5 ++ ...k_high_school_government_and_politics.yaml | 5 ++ ...lu_full_uk_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_uk_high_school_mathematics.yaml | 5 ++ ...lu_full_uk_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_uk_high_school_physics.yaml | 5 ++ ...l_mmlu_full_uk_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_uk_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_uk_high_school_us_history.yaml | 5 ++ ...mlu_full_uk_high_school_world_history.yaml | 5 ++ .../uk/global_mmlu_full_uk_human_aging.yaml | 5 ++ .../global_mmlu_full_uk_human_sexuality.yaml | 5 ++ ...global_mmlu_full_uk_international_law.yaml | 5 ++ .../uk/global_mmlu_full_uk_jurisprudence.yaml | 5 ++ ...global_mmlu_full_uk_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_uk_machine_learning.yaml | 5 ++ .../uk/global_mmlu_full_uk_management.yaml | 5 ++ .../uk/global_mmlu_full_uk_marketing.yaml | 5 ++ .../global_mmlu_full_uk_medical_genetics.yaml | 5 ++ .../uk/global_mmlu_full_uk_miscellaneous.yaml | 5 ++ .../global_mmlu_full_uk_moral_disputes.yaml | 5 ++ .../global_mmlu_full_uk_moral_scenarios.yaml | 5 ++ .../uk/global_mmlu_full_uk_nutrition.yaml | 5 ++ .../uk/global_mmlu_full_uk_philosophy.yaml | 5 ++ .../uk/global_mmlu_full_uk_prehistory.yaml | 5 ++ ..._mmlu_full_uk_professional_accounting.yaml | 5 ++ .../global_mmlu_full_uk_professional_law.yaml | 5 ++ ...al_mmlu_full_uk_professional_medicine.yaml | 5 ++ ..._mmlu_full_uk_professional_psychology.yaml | 5 ++ .../global_mmlu_full_uk_public_relations.yaml | 5 ++ .../global_mmlu_full_uk_security_studies.yaml | 5 ++ .../uk/global_mmlu_full_uk_sociology.yaml | 5 ++ ...global_mmlu_full_uk_us_foreign_policy.yaml | 5 ++ .../full/uk/global_mmlu_full_uk_virology.yaml | 5 ++ .../global_mmlu_full_uk_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/uk/utils.py | 73 +++++++++++++++++++ .../full/vi/_global_mmlu_full_vi.yaml | 11 +++ .../vi/_global_mmlu_full_vi_humanities.yaml | 8 ++ .../full/vi/_global_mmlu_full_vi_other.yaml | 8 ++ .../_global_mmlu_full_vi_social_sciences.yaml | 8 ++ .../full/vi/_global_mmlu_full_vi_stem.yaml | 8 ++ .../global_mmlu/full/vi/_vi_template_yaml | 16 ++++ .../global_mmlu_full_vi_abstract_algebra.yaml | 5 ++ .../full/vi/global_mmlu_full_vi_anatomy.yaml | 5 ++ .../vi/global_mmlu_full_vi_astronomy.yaml | 5 ++ .../global_mmlu_full_vi_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_vi_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_vi_college_biology.yaml | 5 ++ ...global_mmlu_full_vi_college_chemistry.yaml | 5 ++ ...mmlu_full_vi_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_vi_college_mathematics.yaml | 5 ++ .../global_mmlu_full_vi_college_medicine.yaml | 5 ++ .../global_mmlu_full_vi_college_physics.yaml | 5 ++ ...global_mmlu_full_vi_computer_security.yaml | 5 ++ ...lobal_mmlu_full_vi_conceptual_physics.yaml | 5 ++ .../vi/global_mmlu_full_vi_econometrics.yaml | 5 ++ ...l_mmlu_full_vi_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_vi_elementary_mathematics.yaml | 5 ++ .../vi/global_mmlu_full_vi_formal_logic.yaml | 5 ++ .../vi/global_mmlu_full_vi_global_facts.yaml | 5 ++ ...obal_mmlu_full_vi_high_school_biology.yaml | 5 ++ ...al_mmlu_full_vi_high_school_chemistry.yaml | 5 ++ ..._full_vi_high_school_computer_science.yaml | 5 ++ ..._full_vi_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_vi_high_school_geography.yaml | 5 ++ ...i_high_school_government_and_politics.yaml | 5 ++ ...lu_full_vi_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_vi_high_school_mathematics.yaml | 5 ++ ...lu_full_vi_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_vi_high_school_physics.yaml | 5 ++ ...l_mmlu_full_vi_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_vi_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_vi_high_school_us_history.yaml | 5 ++ ...mlu_full_vi_high_school_world_history.yaml | 5 ++ .../vi/global_mmlu_full_vi_human_aging.yaml | 5 ++ .../global_mmlu_full_vi_human_sexuality.yaml | 5 ++ ...global_mmlu_full_vi_international_law.yaml | 5 ++ .../vi/global_mmlu_full_vi_jurisprudence.yaml | 5 ++ ...global_mmlu_full_vi_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_vi_machine_learning.yaml | 5 ++ .../vi/global_mmlu_full_vi_management.yaml | 5 ++ .../vi/global_mmlu_full_vi_marketing.yaml | 5 ++ .../global_mmlu_full_vi_medical_genetics.yaml | 5 ++ .../vi/global_mmlu_full_vi_miscellaneous.yaml | 5 ++ .../global_mmlu_full_vi_moral_disputes.yaml | 5 ++ .../global_mmlu_full_vi_moral_scenarios.yaml | 5 ++ .../vi/global_mmlu_full_vi_nutrition.yaml | 5 ++ .../vi/global_mmlu_full_vi_philosophy.yaml | 5 ++ .../vi/global_mmlu_full_vi_prehistory.yaml | 5 ++ ..._mmlu_full_vi_professional_accounting.yaml | 5 ++ .../global_mmlu_full_vi_professional_law.yaml | 5 ++ ...al_mmlu_full_vi_professional_medicine.yaml | 5 ++ ..._mmlu_full_vi_professional_psychology.yaml | 5 ++ .../global_mmlu_full_vi_public_relations.yaml | 5 ++ .../global_mmlu_full_vi_security_studies.yaml | 5 ++ .../vi/global_mmlu_full_vi_sociology.yaml | 5 ++ ...global_mmlu_full_vi_us_foreign_policy.yaml | 5 ++ .../full/vi/global_mmlu_full_vi_virology.yaml | 5 ++ .../global_mmlu_full_vi_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/vi/utils.py | 73 +++++++++++++++++++ .../full/yo/_global_mmlu_full_yo.yaml | 11 +++ .../yo/_global_mmlu_full_yo_humanities.yaml | 8 ++ .../full/yo/_global_mmlu_full_yo_other.yaml | 8 ++ .../_global_mmlu_full_yo_social_sciences.yaml | 8 ++ .../full/yo/_global_mmlu_full_yo_stem.yaml | 8 ++ .../global_mmlu/full/yo/_yo_template_yaml | 16 ++++ .../global_mmlu_full_yo_abstract_algebra.yaml | 5 ++ .../full/yo/global_mmlu_full_yo_anatomy.yaml | 5 ++ .../yo/global_mmlu_full_yo_astronomy.yaml | 5 ++ .../global_mmlu_full_yo_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_yo_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_yo_college_biology.yaml | 5 ++ ...global_mmlu_full_yo_college_chemistry.yaml | 5 ++ ...mmlu_full_yo_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_yo_college_mathematics.yaml | 5 ++ .../global_mmlu_full_yo_college_medicine.yaml | 5 ++ .../global_mmlu_full_yo_college_physics.yaml | 5 ++ ...global_mmlu_full_yo_computer_security.yaml | 5 ++ ...lobal_mmlu_full_yo_conceptual_physics.yaml | 5 ++ .../yo/global_mmlu_full_yo_econometrics.yaml | 5 ++ ...l_mmlu_full_yo_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_yo_elementary_mathematics.yaml | 5 ++ .../yo/global_mmlu_full_yo_formal_logic.yaml | 5 ++ .../yo/global_mmlu_full_yo_global_facts.yaml | 5 ++ ...obal_mmlu_full_yo_high_school_biology.yaml | 5 ++ ...al_mmlu_full_yo_high_school_chemistry.yaml | 5 ++ ..._full_yo_high_school_computer_science.yaml | 5 ++ ..._full_yo_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_yo_high_school_geography.yaml | 5 ++ ...o_high_school_government_and_politics.yaml | 5 ++ ...lu_full_yo_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_yo_high_school_mathematics.yaml | 5 ++ ...lu_full_yo_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_yo_high_school_physics.yaml | 5 ++ ...l_mmlu_full_yo_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_yo_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_yo_high_school_us_history.yaml | 5 ++ ...mlu_full_yo_high_school_world_history.yaml | 5 ++ .../yo/global_mmlu_full_yo_human_aging.yaml | 5 ++ .../global_mmlu_full_yo_human_sexuality.yaml | 5 ++ ...global_mmlu_full_yo_international_law.yaml | 5 ++ .../yo/global_mmlu_full_yo_jurisprudence.yaml | 5 ++ ...global_mmlu_full_yo_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_yo_machine_learning.yaml | 5 ++ .../yo/global_mmlu_full_yo_management.yaml | 5 ++ .../yo/global_mmlu_full_yo_marketing.yaml | 5 ++ .../global_mmlu_full_yo_medical_genetics.yaml | 5 ++ .../yo/global_mmlu_full_yo_miscellaneous.yaml | 5 ++ .../global_mmlu_full_yo_moral_disputes.yaml | 5 ++ .../global_mmlu_full_yo_moral_scenarios.yaml | 5 ++ .../yo/global_mmlu_full_yo_nutrition.yaml | 5 ++ .../yo/global_mmlu_full_yo_philosophy.yaml | 5 ++ .../yo/global_mmlu_full_yo_prehistory.yaml | 5 ++ ..._mmlu_full_yo_professional_accounting.yaml | 5 ++ .../global_mmlu_full_yo_professional_law.yaml | 5 ++ ...al_mmlu_full_yo_professional_medicine.yaml | 5 ++ ..._mmlu_full_yo_professional_psychology.yaml | 5 ++ .../global_mmlu_full_yo_public_relations.yaml | 5 ++ .../global_mmlu_full_yo_security_studies.yaml | 5 ++ .../yo/global_mmlu_full_yo_sociology.yaml | 5 ++ ...global_mmlu_full_yo_us_foreign_policy.yaml | 5 ++ .../full/yo/global_mmlu_full_yo_virology.yaml | 5 ++ .../global_mmlu_full_yo_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/yo/utils.py | 73 +++++++++++++++++++ .../full/zh/_global_mmlu_full_zh.yaml | 11 +++ .../zh/_global_mmlu_full_zh_humanities.yaml | 8 ++ .../full/zh/_global_mmlu_full_zh_other.yaml | 8 ++ .../_global_mmlu_full_zh_social_sciences.yaml | 8 ++ .../full/zh/_global_mmlu_full_zh_stem.yaml | 8 ++ .../global_mmlu/full/zh/_zh_template_yaml | 16 ++++ .../global_mmlu_full_zh_abstract_algebra.yaml | 5 ++ .../full/zh/global_mmlu_full_zh_anatomy.yaml | 5 ++ .../zh/global_mmlu_full_zh_astronomy.yaml | 5 ++ .../global_mmlu_full_zh_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_zh_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_zh_college_biology.yaml | 5 ++ ...global_mmlu_full_zh_college_chemistry.yaml | 5 ++ ...mmlu_full_zh_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_zh_college_mathematics.yaml | 5 ++ .../global_mmlu_full_zh_college_medicine.yaml | 5 ++ .../global_mmlu_full_zh_college_physics.yaml | 5 ++ ...global_mmlu_full_zh_computer_security.yaml | 5 ++ ...lobal_mmlu_full_zh_conceptual_physics.yaml | 5 ++ .../zh/global_mmlu_full_zh_econometrics.yaml | 5 ++ ...l_mmlu_full_zh_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_zh_elementary_mathematics.yaml | 5 ++ .../zh/global_mmlu_full_zh_formal_logic.yaml | 5 ++ .../zh/global_mmlu_full_zh_global_facts.yaml | 5 ++ ...obal_mmlu_full_zh_high_school_biology.yaml | 5 ++ ...al_mmlu_full_zh_high_school_chemistry.yaml | 5 ++ ..._full_zh_high_school_computer_science.yaml | 5 ++ ..._full_zh_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_zh_high_school_geography.yaml | 5 ++ ...h_high_school_government_and_politics.yaml | 5 ++ ...lu_full_zh_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_zh_high_school_mathematics.yaml | 5 ++ ...lu_full_zh_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_zh_high_school_physics.yaml | 5 ++ ...l_mmlu_full_zh_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_zh_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_zh_high_school_us_history.yaml | 5 ++ ...mlu_full_zh_high_school_world_history.yaml | 5 ++ .../zh/global_mmlu_full_zh_human_aging.yaml | 5 ++ .../global_mmlu_full_zh_human_sexuality.yaml | 5 ++ ...global_mmlu_full_zh_international_law.yaml | 5 ++ .../zh/global_mmlu_full_zh_jurisprudence.yaml | 5 ++ ...global_mmlu_full_zh_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_zh_machine_learning.yaml | 5 ++ .../zh/global_mmlu_full_zh_management.yaml | 5 ++ .../zh/global_mmlu_full_zh_marketing.yaml | 5 ++ .../global_mmlu_full_zh_medical_genetics.yaml | 5 ++ .../zh/global_mmlu_full_zh_miscellaneous.yaml | 5 ++ .../global_mmlu_full_zh_moral_disputes.yaml | 5 ++ .../global_mmlu_full_zh_moral_scenarios.yaml | 5 ++ .../zh/global_mmlu_full_zh_nutrition.yaml | 5 ++ .../zh/global_mmlu_full_zh_philosophy.yaml | 5 ++ .../zh/global_mmlu_full_zh_prehistory.yaml | 5 ++ ..._mmlu_full_zh_professional_accounting.yaml | 5 ++ .../global_mmlu_full_zh_professional_law.yaml | 5 ++ ...al_mmlu_full_zh_professional_medicine.yaml | 5 ++ ..._mmlu_full_zh_professional_psychology.yaml | 5 ++ .../global_mmlu_full_zh_public_relations.yaml | 5 ++ .../global_mmlu_full_zh_security_studies.yaml | 5 ++ .../zh/global_mmlu_full_zh_sociology.yaml | 5 ++ ...global_mmlu_full_zh_us_foreign_policy.yaml | 5 ++ .../full/zh/global_mmlu_full_zh_virology.yaml | 5 ++ .../global_mmlu_full_zh_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/zh/utils.py | 73 +++++++++++++++++++ 2706 files changed, 17532 insertions(+), 1 deletion(-) rename lm_eval/tasks/global_mmlu/{ => default}/_default_yaml (100%) rename lm_eval/tasks/global_mmlu/{ => default}/_generate_configs.py (100%) rename lm_eval/tasks/global_mmlu/{ => default}/global_mmlu_ar.yaml (100%) rename lm_eval/tasks/global_mmlu/{ => default}/global_mmlu_bn.yaml (100%) rename lm_eval/tasks/global_mmlu/{ => default}/global_mmlu_de.yaml (100%) rename lm_eval/tasks/global_mmlu/{ => default}/global_mmlu_en.yaml (100%) rename lm_eval/tasks/global_mmlu/{ => default}/global_mmlu_es.yaml (100%) rename lm_eval/tasks/global_mmlu/{ => default}/global_mmlu_fr.yaml (100%) rename lm_eval/tasks/global_mmlu/{ => default}/global_mmlu_hi.yaml (100%) rename lm_eval/tasks/global_mmlu/{ => default}/global_mmlu_id.yaml (100%) rename lm_eval/tasks/global_mmlu/{ => default}/global_mmlu_it.yaml (100%) rename lm_eval/tasks/global_mmlu/{ => default}/global_mmlu_ja.yaml (100%) rename lm_eval/tasks/global_mmlu/{ => default}/global_mmlu_ko.yaml (100%) rename lm_eval/tasks/global_mmlu/{ => default}/global_mmlu_pt.yaml (100%) rename lm_eval/tasks/global_mmlu/{ => default}/global_mmlu_sw.yaml (100%) rename lm_eval/tasks/global_mmlu/{ => default}/global_mmlu_yo.yaml (100%) rename lm_eval/tasks/global_mmlu/{ => default}/global_mmlu_zh.yaml (100%) create mode 100644 lm_eval/tasks/global_mmlu/full/am/_am_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/ar/_ar_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/bn/_bn_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/cs/_cs_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/de/_de_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/el/_el_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/en/_en_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/es/_es_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/fa/_fa_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/fil/_fil_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/fr/_fr_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/_ha_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/_he_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/_hi_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/_id_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/_ig_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/_it_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/_ja_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/_ko_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/_ky_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/_lt_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/_mg_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/_ms_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/_ne_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/_nl_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/_ny_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/_pl_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/_pt_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/_ro_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/_ru_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/_si_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/_sn_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/_so_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/_sr_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/_sv_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/_sw_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/_te_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/_tr_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/_uk_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/_vi_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/_yo_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/_zh_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/utils.py diff --git a/lm_eval/tasks/global_mmlu/README.md b/lm_eval/tasks/global_mmlu/README.md index 838a7c9d..d1514102 100644 --- a/lm_eval/tasks/global_mmlu/README.md +++ b/lm_eval/tasks/global_mmlu/README.md @@ -6,9 +6,26 @@ Title: `Global MMLU: Understanding and Addressing Cultural and Linguistic Biases Abstract: [https://arxiv.org/abs/2412.03304](https://arxiv.org/abs/2412.03304) +Global-MMLU 🌍 is a multilingual evaluation set spanning 42 languages, including English. This dataset combines machine translations for MMLU questions along with professional translations and crowd-sourced post-edits. It also includes cultural sensitivity annotations for a subset of the questions (2850 questions per language) and classifies them as Culturally Sensitive (CS) 🗽 or Culturally Agnostic (CA) ⚖️. These annotations were collected as part of an open science initiative led by Cohere For AI in collaboration with many external collaborators from both industry and academia. + Global-MMLU-Lite is a balanced collection of culturally sensitive and culturally agnostic MMLU tasks. It is designed for efficient evaluation of multilingual models in 15 languages (including English). Only languages with human translations and post-edits in the original [Global-MMLU](https://huggingface.co/datasets/CohereForAI/Global-MMLU) 🌍 dataset have been included in the lite version. -Homepage: [https://huggingface.co/datasets/CohereForAI/Global-MMLU-Lite](https://huggingface.co/datasets/CohereForAI/Global-MMLU-Lite) +Homepage: \ +[https://huggingface.co/datasets/CohereForAI/Global-MMLU](https://huggingface.co/datasets/CohereForAI/Global-MMLU) \ +[https://huggingface.co/datasets/CohereForAI/Global-MMLU-Lite](https://huggingface.co/datasets/CohereForAI/Global-MMLU-Lite) + + +#### Groups + +* `global_mmlu_{lang}`: This group uses `Global-MMLU-Lite` benchmark which supports 14 languages. +* `global_mmlu_full_{lang}`: This group uses `Global-MMLU` benchmark which supports 42 languages. + +#### Subgroups (support only for `full` version) + +* `global_mmlu_full_stem` +* `global_mmlu_full_humanities` +* `global_mmlu_full_social_sciences` +* `global_mmlu_full_other` ### Citation diff --git a/lm_eval/tasks/global_mmlu/_default_yaml b/lm_eval/tasks/global_mmlu/default/_default_yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/_default_yaml rename to lm_eval/tasks/global_mmlu/default/_default_yaml diff --git a/lm_eval/tasks/global_mmlu/_generate_configs.py b/lm_eval/tasks/global_mmlu/default/_generate_configs.py similarity index 100% rename from lm_eval/tasks/global_mmlu/_generate_configs.py rename to lm_eval/tasks/global_mmlu/default/_generate_configs.py diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_ar.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_ar.yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/global_mmlu_ar.yaml rename to lm_eval/tasks/global_mmlu/default/global_mmlu_ar.yaml diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_bn.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_bn.yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/global_mmlu_bn.yaml rename to lm_eval/tasks/global_mmlu/default/global_mmlu_bn.yaml diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_de.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_de.yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/global_mmlu_de.yaml rename to lm_eval/tasks/global_mmlu/default/global_mmlu_de.yaml diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_en.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_en.yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/global_mmlu_en.yaml rename to lm_eval/tasks/global_mmlu/default/global_mmlu_en.yaml diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_es.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_es.yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/global_mmlu_es.yaml rename to lm_eval/tasks/global_mmlu/default/global_mmlu_es.yaml diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_fr.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_fr.yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/global_mmlu_fr.yaml rename to lm_eval/tasks/global_mmlu/default/global_mmlu_fr.yaml diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_hi.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_hi.yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/global_mmlu_hi.yaml rename to lm_eval/tasks/global_mmlu/default/global_mmlu_hi.yaml diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_id.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_id.yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/global_mmlu_id.yaml rename to lm_eval/tasks/global_mmlu/default/global_mmlu_id.yaml diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_it.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_it.yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/global_mmlu_it.yaml rename to lm_eval/tasks/global_mmlu/default/global_mmlu_it.yaml diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_ja.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_ja.yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/global_mmlu_ja.yaml rename to lm_eval/tasks/global_mmlu/default/global_mmlu_ja.yaml diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_ko.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_ko.yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/global_mmlu_ko.yaml rename to lm_eval/tasks/global_mmlu/default/global_mmlu_ko.yaml diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_pt.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_pt.yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/global_mmlu_pt.yaml rename to lm_eval/tasks/global_mmlu/default/global_mmlu_pt.yaml diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_sw.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_sw.yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/global_mmlu_sw.yaml rename to lm_eval/tasks/global_mmlu/default/global_mmlu_sw.yaml diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_yo.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_yo.yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/global_mmlu_yo.yaml rename to lm_eval/tasks/global_mmlu/default/global_mmlu_yo.yaml diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_zh.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_zh.yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/global_mmlu_zh.yaml rename to lm_eval/tasks/global_mmlu/default/global_mmlu_zh.yaml diff --git a/lm_eval/tasks/global_mmlu/full/am/_am_template_yaml b/lm_eval/tasks/global_mmlu/full/am/_am_template_yaml new file mode 100644 index 00000000..f52152bb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/_am_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: am +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am.yaml b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am.yaml new file mode 100644 index 00000000..48fc270a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_am +task: + - global_mmlu_full_am_stem + - global_mmlu_full_am_other + - global_mmlu_full_am_social_sciences + - global_mmlu_full_am_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_humanities.yaml b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_humanities.yaml new file mode 100644 index 00000000..e250d14c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_am_humanities +task: + - global_mmlu_full_am_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_other.yaml b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_other.yaml new file mode 100644 index 00000000..4b5151ce --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_am_other +task: + - global_mmlu_full_am_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_social_sciences.yaml new file mode 100644 index 00000000..f0fbcc1b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_am_social_sciences +task: + - global_mmlu_full_am_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_stem.yaml b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_stem.yaml new file mode 100644 index 00000000..b67dfdb7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_am_stem +task: + - global_mmlu_full_am_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_abstract_algebra.yaml new file mode 100644 index 00000000..06a70dd8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_anatomy.yaml new file mode 100644 index 00000000..7914c3b0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_astronomy.yaml new file mode 100644 index 00000000..4e7e2a04 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_business_ethics.yaml new file mode 100644 index 00000000..a98a9597 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_clinical_knowledge.yaml new file mode 100644 index 00000000..4c25627f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_biology.yaml new file mode 100644 index 00000000..a8b6661b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_chemistry.yaml new file mode 100644 index 00000000..b0d2d2a8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_computer_science.yaml new file mode 100644 index 00000000..b5c52a82 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_mathematics.yaml new file mode 100644 index 00000000..0b73422e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_medicine.yaml new file mode 100644 index 00000000..bd36f40f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_physics.yaml new file mode 100644 index 00000000..009fdc1a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_computer_security.yaml new file mode 100644 index 00000000..3df6247b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_conceptual_physics.yaml new file mode 100644 index 00000000..4115ea02 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_econometrics.yaml new file mode 100644 index 00000000..87dd12ca --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_electrical_engineering.yaml new file mode 100644 index 00000000..d8f72619 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_elementary_mathematics.yaml new file mode 100644 index 00000000..455563f1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_formal_logic.yaml new file mode 100644 index 00000000..5c5babd4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_global_facts.yaml new file mode 100644 index 00000000..b59d47e4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_biology.yaml new file mode 100644 index 00000000..680d4eca --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_chemistry.yaml new file mode 100644 index 00000000..96af7940 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_computer_science.yaml new file mode 100644 index 00000000..6cd19227 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_european_history.yaml new file mode 100644 index 00000000..e0249142 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_geography.yaml new file mode 100644 index 00000000..b4925a54 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_government_and_politics.yaml new file mode 100644 index 00000000..d63f1d35 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_macroeconomics.yaml new file mode 100644 index 00000000..3c8a0ea6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_mathematics.yaml new file mode 100644 index 00000000..76a8c3d3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_microeconomics.yaml new file mode 100644 index 00000000..1acbf4e1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_physics.yaml new file mode 100644 index 00000000..dcfd9bb9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_psychology.yaml new file mode 100644 index 00000000..2dd64dc1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_statistics.yaml new file mode 100644 index 00000000..a523f443 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_us_history.yaml new file mode 100644 index 00000000..ce233f44 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_world_history.yaml new file mode 100644 index 00000000..20aeca5e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_human_aging.yaml new file mode 100644 index 00000000..18e95e40 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_human_sexuality.yaml new file mode 100644 index 00000000..140f2329 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_international_law.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_international_law.yaml new file mode 100644 index 00000000..10a2d638 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_international_law diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_jurisprudence.yaml new file mode 100644 index 00000000..cd982742 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_logical_fallacies.yaml new file mode 100644 index 00000000..2faf735c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_machine_learning.yaml new file mode 100644 index 00000000..7f5c8e9b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_management.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_management.yaml new file mode 100644 index 00000000..08d080a8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_management diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_marketing.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_marketing.yaml new file mode 100644 index 00000000..52b4f7c6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_marketing diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_medical_genetics.yaml new file mode 100644 index 00000000..32bd2432 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_miscellaneous.yaml new file mode 100644 index 00000000..ed5d610d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_moral_disputes.yaml new file mode 100644 index 00000000..bddaebc7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_moral_scenarios.yaml new file mode 100644 index 00000000..fda69f31 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_nutrition.yaml new file mode 100644 index 00000000..bb0cb08b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_philosophy.yaml new file mode 100644 index 00000000..484c015e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_prehistory.yaml new file mode 100644 index 00000000..6e104f48 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_accounting.yaml new file mode 100644 index 00000000..50c9fe50 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_law.yaml new file mode 100644 index 00000000..df2cf26c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_medicine.yaml new file mode 100644 index 00000000..c2860528 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_psychology.yaml new file mode 100644 index 00000000..8562a28d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_public_relations.yaml new file mode 100644 index 00000000..5cb3186c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_security_studies.yaml new file mode 100644 index 00000000..6aa8575e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_sociology.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_sociology.yaml new file mode 100644 index 00000000..60005bab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_sociology diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_us_foreign_policy.yaml new file mode 100644 index 00000000..374fb14a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_virology.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_virology.yaml new file mode 100644 index 00000000..9f235299 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_virology diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_world_religions.yaml new file mode 100644 index 00000000..c169a048 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/am/utils.py b/lm_eval/tasks/global_mmlu/full/am/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ar/_ar_template_yaml b/lm_eval/tasks/global_mmlu/full/ar/_ar_template_yaml new file mode 100644 index 00000000..768bb7f9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/_ar_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ar +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar.yaml b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar.yaml new file mode 100644 index 00000000..61f60b9b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ar +task: + - global_mmlu_full_ar_stem + - global_mmlu_full_ar_other + - global_mmlu_full_ar_social_sciences + - global_mmlu_full_ar_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_humanities.yaml new file mode 100644 index 00000000..cfa6d80a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ar_humanities +task: + - global_mmlu_full_ar_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_other.yaml b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_other.yaml new file mode 100644 index 00000000..26603f33 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ar_other +task: + - global_mmlu_full_ar_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_social_sciences.yaml new file mode 100644 index 00000000..aca95bc2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ar_social_sciences +task: + - global_mmlu_full_ar_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_stem.yaml b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_stem.yaml new file mode 100644 index 00000000..b91e6c9b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ar_stem +task: + - global_mmlu_full_ar_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_abstract_algebra.yaml new file mode 100644 index 00000000..1f044b04 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_anatomy.yaml new file mode 100644 index 00000000..cd5d0963 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_astronomy.yaml new file mode 100644 index 00000000..d21c00b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_business_ethics.yaml new file mode 100644 index 00000000..a73f5f2d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_clinical_knowledge.yaml new file mode 100644 index 00000000..a9c3d078 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_biology.yaml new file mode 100644 index 00000000..6fba6a1b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_chemistry.yaml new file mode 100644 index 00000000..386ba52d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_computer_science.yaml new file mode 100644 index 00000000..9b846715 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_mathematics.yaml new file mode 100644 index 00000000..c8d8d090 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_medicine.yaml new file mode 100644 index 00000000..b988cfee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_physics.yaml new file mode 100644 index 00000000..008a39dc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_computer_security.yaml new file mode 100644 index 00000000..34a93535 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_conceptual_physics.yaml new file mode 100644 index 00000000..ea20efa5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_econometrics.yaml new file mode 100644 index 00000000..3a757901 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_electrical_engineering.yaml new file mode 100644 index 00000000..31a4e22e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_elementary_mathematics.yaml new file mode 100644 index 00000000..25f4adb9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_formal_logic.yaml new file mode 100644 index 00000000..b2792d56 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_global_facts.yaml new file mode 100644 index 00000000..af1bf60b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_biology.yaml new file mode 100644 index 00000000..8f7eaff7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_chemistry.yaml new file mode 100644 index 00000000..8f56395b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_computer_science.yaml new file mode 100644 index 00000000..6e388aed --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_european_history.yaml new file mode 100644 index 00000000..741584c5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_geography.yaml new file mode 100644 index 00000000..3c376967 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_government_and_politics.yaml new file mode 100644 index 00000000..c71ada9b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_macroeconomics.yaml new file mode 100644 index 00000000..0b5f3267 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_mathematics.yaml new file mode 100644 index 00000000..cb259ac2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_microeconomics.yaml new file mode 100644 index 00000000..c4ab308b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_physics.yaml new file mode 100644 index 00000000..68180e5d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_psychology.yaml new file mode 100644 index 00000000..e727ad09 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_statistics.yaml new file mode 100644 index 00000000..8ff9dd0b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_us_history.yaml new file mode 100644 index 00000000..668991cf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_world_history.yaml new file mode 100644 index 00000000..1df9a553 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_human_aging.yaml new file mode 100644 index 00000000..515a40f0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_human_sexuality.yaml new file mode 100644 index 00000000..24caceac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_international_law.yaml new file mode 100644 index 00000000..a5aee4b2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_jurisprudence.yaml new file mode 100644 index 00000000..37781208 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_logical_fallacies.yaml new file mode 100644 index 00000000..4365730e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_machine_learning.yaml new file mode 100644 index 00000000..e1fc86e2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_management.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_management.yaml new file mode 100644 index 00000000..4dc7c8c0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_management diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_marketing.yaml new file mode 100644 index 00000000..371fb521 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_medical_genetics.yaml new file mode 100644 index 00000000..c080b645 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_miscellaneous.yaml new file mode 100644 index 00000000..7d593ecb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_moral_disputes.yaml new file mode 100644 index 00000000..4021a93e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_moral_scenarios.yaml new file mode 100644 index 00000000..f09edd00 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_nutrition.yaml new file mode 100644 index 00000000..8d8577cb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_philosophy.yaml new file mode 100644 index 00000000..733b77ce --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_prehistory.yaml new file mode 100644 index 00000000..4d1bf141 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_accounting.yaml new file mode 100644 index 00000000..45b07299 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_law.yaml new file mode 100644 index 00000000..6e33b583 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_medicine.yaml new file mode 100644 index 00000000..4cd0a17a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_psychology.yaml new file mode 100644 index 00000000..f035162d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_public_relations.yaml new file mode 100644 index 00000000..3d4dd34f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_security_studies.yaml new file mode 100644 index 00000000..f2245b52 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_sociology.yaml new file mode 100644 index 00000000..dd920305 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_us_foreign_policy.yaml new file mode 100644 index 00000000..0f38b855 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_virology.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_virology.yaml new file mode 100644 index 00000000..f3be1f8f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_virology diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_world_religions.yaml new file mode 100644 index 00000000..7c7f01a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ar/utils.py b/lm_eval/tasks/global_mmlu/full/ar/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/bn/_bn_template_yaml b/lm_eval/tasks/global_mmlu/full/bn/_bn_template_yaml new file mode 100644 index 00000000..f388063d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/_bn_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: bn +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn.yaml b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn.yaml new file mode 100644 index 00000000..f1c91f09 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_bn +task: + - global_mmlu_full_bn_stem + - global_mmlu_full_bn_other + - global_mmlu_full_bn_social_sciences + - global_mmlu_full_bn_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_humanities.yaml b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_humanities.yaml new file mode 100644 index 00000000..acd1ab01 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_bn_humanities +task: + - global_mmlu_full_bn_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_other.yaml b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_other.yaml new file mode 100644 index 00000000..d2160298 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_bn_other +task: + - global_mmlu_full_bn_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_social_sciences.yaml new file mode 100644 index 00000000..c359b359 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_bn_social_sciences +task: + - global_mmlu_full_bn_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_stem.yaml b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_stem.yaml new file mode 100644 index 00000000..2c78c4ce --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_bn_stem +task: + - global_mmlu_full_bn_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_abstract_algebra.yaml new file mode 100644 index 00000000..5bb7bb61 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_anatomy.yaml new file mode 100644 index 00000000..d49070f1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_astronomy.yaml new file mode 100644 index 00000000..2e6dbc97 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_business_ethics.yaml new file mode 100644 index 00000000..8c45a0e2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_clinical_knowledge.yaml new file mode 100644 index 00000000..97e17570 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_biology.yaml new file mode 100644 index 00000000..9bf0b34c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_chemistry.yaml new file mode 100644 index 00000000..cb5a2600 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_computer_science.yaml new file mode 100644 index 00000000..ecd60e54 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_mathematics.yaml new file mode 100644 index 00000000..5fb69d57 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_medicine.yaml new file mode 100644 index 00000000..442045f9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_physics.yaml new file mode 100644 index 00000000..6849ffbb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_computer_security.yaml new file mode 100644 index 00000000..184097f8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_conceptual_physics.yaml new file mode 100644 index 00000000..4dc8a2c2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_econometrics.yaml new file mode 100644 index 00000000..941f6355 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_electrical_engineering.yaml new file mode 100644 index 00000000..5918b08a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_elementary_mathematics.yaml new file mode 100644 index 00000000..8f7d1f10 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_formal_logic.yaml new file mode 100644 index 00000000..b54c80db --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_global_facts.yaml new file mode 100644 index 00000000..371d61cd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_biology.yaml new file mode 100644 index 00000000..4f2c8731 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_chemistry.yaml new file mode 100644 index 00000000..0022c824 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_computer_science.yaml new file mode 100644 index 00000000..62ed6c6c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_european_history.yaml new file mode 100644 index 00000000..b9118a11 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_geography.yaml new file mode 100644 index 00000000..a7fa3c1b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_government_and_politics.yaml new file mode 100644 index 00000000..067ec0ea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_macroeconomics.yaml new file mode 100644 index 00000000..12c775ab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_mathematics.yaml new file mode 100644 index 00000000..82809b15 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_microeconomics.yaml new file mode 100644 index 00000000..a14eb703 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_physics.yaml new file mode 100644 index 00000000..a84f85fd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_psychology.yaml new file mode 100644 index 00000000..5b10c59d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_statistics.yaml new file mode 100644 index 00000000..f4231ea2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_us_history.yaml new file mode 100644 index 00000000..28dbddf7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_world_history.yaml new file mode 100644 index 00000000..fc8aec7d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_human_aging.yaml new file mode 100644 index 00000000..16a3c204 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_human_sexuality.yaml new file mode 100644 index 00000000..a25244a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_international_law.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_international_law.yaml new file mode 100644 index 00000000..b47f516d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_international_law diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_jurisprudence.yaml new file mode 100644 index 00000000..08ab51e2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_logical_fallacies.yaml new file mode 100644 index 00000000..0885a1a9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_machine_learning.yaml new file mode 100644 index 00000000..f0eb0997 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_management.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_management.yaml new file mode 100644 index 00000000..d006b411 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_management diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_marketing.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_marketing.yaml new file mode 100644 index 00000000..520f9469 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_marketing diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_medical_genetics.yaml new file mode 100644 index 00000000..88caa977 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_miscellaneous.yaml new file mode 100644 index 00000000..9ce31f7b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_moral_disputes.yaml new file mode 100644 index 00000000..44403216 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_moral_scenarios.yaml new file mode 100644 index 00000000..e5f59e15 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_nutrition.yaml new file mode 100644 index 00000000..422bba55 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_philosophy.yaml new file mode 100644 index 00000000..62af532b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_prehistory.yaml new file mode 100644 index 00000000..dc49d36c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_accounting.yaml new file mode 100644 index 00000000..bf72a6a4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_law.yaml new file mode 100644 index 00000000..f49fb142 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_medicine.yaml new file mode 100644 index 00000000..3c53d77a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_psychology.yaml new file mode 100644 index 00000000..a50c5cbf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_public_relations.yaml new file mode 100644 index 00000000..00e2742a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_security_studies.yaml new file mode 100644 index 00000000..5a0e7612 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_sociology.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_sociology.yaml new file mode 100644 index 00000000..e8820319 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_sociology diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_us_foreign_policy.yaml new file mode 100644 index 00000000..42be796a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_virology.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_virology.yaml new file mode 100644 index 00000000..3959f006 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_virology diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_world_religions.yaml new file mode 100644 index 00000000..15ee9efc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/bn/utils.py b/lm_eval/tasks/global_mmlu/full/bn/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/cs/_cs_template_yaml b/lm_eval/tasks/global_mmlu/full/cs/_cs_template_yaml new file mode 100644 index 00000000..ce2189a0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/_cs_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: cs +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs.yaml b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs.yaml new file mode 100644 index 00000000..977b0051 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_cs +task: + - global_mmlu_full_cs_stem + - global_mmlu_full_cs_other + - global_mmlu_full_cs_social_sciences + - global_mmlu_full_cs_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_humanities.yaml b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_humanities.yaml new file mode 100644 index 00000000..b4b4aff3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_cs_humanities +task: + - global_mmlu_full_cs_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_other.yaml b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_other.yaml new file mode 100644 index 00000000..302912e4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_cs_other +task: + - global_mmlu_full_cs_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_social_sciences.yaml new file mode 100644 index 00000000..d3fed76e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_cs_social_sciences +task: + - global_mmlu_full_cs_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_stem.yaml b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_stem.yaml new file mode 100644 index 00000000..898bb092 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_cs_stem +task: + - global_mmlu_full_cs_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_abstract_algebra.yaml new file mode 100644 index 00000000..40431ec9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_anatomy.yaml new file mode 100644 index 00000000..97d7354d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_astronomy.yaml new file mode 100644 index 00000000..5b5a5f99 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_business_ethics.yaml new file mode 100644 index 00000000..6db79c52 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_clinical_knowledge.yaml new file mode 100644 index 00000000..3a17c605 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_biology.yaml new file mode 100644 index 00000000..9c6597b4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_chemistry.yaml new file mode 100644 index 00000000..713af5c3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_computer_science.yaml new file mode 100644 index 00000000..fd619d13 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_mathematics.yaml new file mode 100644 index 00000000..e09563f9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_medicine.yaml new file mode 100644 index 00000000..f7b868c9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_physics.yaml new file mode 100644 index 00000000..e98df339 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_computer_security.yaml new file mode 100644 index 00000000..7256ad67 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_conceptual_physics.yaml new file mode 100644 index 00000000..9bd64498 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_econometrics.yaml new file mode 100644 index 00000000..c954d320 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_electrical_engineering.yaml new file mode 100644 index 00000000..2f80e8ac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_elementary_mathematics.yaml new file mode 100644 index 00000000..bfbc2c9d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_formal_logic.yaml new file mode 100644 index 00000000..0c2ec8bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_global_facts.yaml new file mode 100644 index 00000000..6302b417 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_biology.yaml new file mode 100644 index 00000000..b69e9ac3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_chemistry.yaml new file mode 100644 index 00000000..67f53cf5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_computer_science.yaml new file mode 100644 index 00000000..0be19221 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_european_history.yaml new file mode 100644 index 00000000..7fa264c3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_geography.yaml new file mode 100644 index 00000000..b9f903c2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_government_and_politics.yaml new file mode 100644 index 00000000..5bde4d69 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_macroeconomics.yaml new file mode 100644 index 00000000..bb5068ed --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_mathematics.yaml new file mode 100644 index 00000000..87cb3e57 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_microeconomics.yaml new file mode 100644 index 00000000..33c2e18c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_physics.yaml new file mode 100644 index 00000000..1ed095bf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_psychology.yaml new file mode 100644 index 00000000..59b62305 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_statistics.yaml new file mode 100644 index 00000000..1a18ee25 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_us_history.yaml new file mode 100644 index 00000000..d8d0a271 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_world_history.yaml new file mode 100644 index 00000000..07012306 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_human_aging.yaml new file mode 100644 index 00000000..e3f5c7c3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_human_sexuality.yaml new file mode 100644 index 00000000..61d405c7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_international_law.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_international_law.yaml new file mode 100644 index 00000000..509ebee4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_international_law diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_jurisprudence.yaml new file mode 100644 index 00000000..c0e27957 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_logical_fallacies.yaml new file mode 100644 index 00000000..85010f3c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_machine_learning.yaml new file mode 100644 index 00000000..32aaa1a6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_management.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_management.yaml new file mode 100644 index 00000000..4e1a3a7c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_management diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_marketing.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_marketing.yaml new file mode 100644 index 00000000..239e3c0c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_marketing diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_medical_genetics.yaml new file mode 100644 index 00000000..1c76fee7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_miscellaneous.yaml new file mode 100644 index 00000000..4be6207a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_moral_disputes.yaml new file mode 100644 index 00000000..b263f67e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_moral_scenarios.yaml new file mode 100644 index 00000000..6532a43e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_nutrition.yaml new file mode 100644 index 00000000..3f04fbcd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_philosophy.yaml new file mode 100644 index 00000000..2f5093f9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_prehistory.yaml new file mode 100644 index 00000000..a8f5f5a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_accounting.yaml new file mode 100644 index 00000000..bccb71b2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_law.yaml new file mode 100644 index 00000000..ff50f50c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_medicine.yaml new file mode 100644 index 00000000..9b829379 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_psychology.yaml new file mode 100644 index 00000000..e41edb29 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_public_relations.yaml new file mode 100644 index 00000000..e8fb512d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_security_studies.yaml new file mode 100644 index 00000000..64ec0b3f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_sociology.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_sociology.yaml new file mode 100644 index 00000000..18214f7c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_sociology diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_us_foreign_policy.yaml new file mode 100644 index 00000000..ac42b097 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_virology.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_virology.yaml new file mode 100644 index 00000000..a51b8aef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_virology diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_world_religions.yaml new file mode 100644 index 00000000..cf9af3e9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/cs/utils.py b/lm_eval/tasks/global_mmlu/full/cs/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/de/_de_template_yaml b/lm_eval/tasks/global_mmlu/full/de/_de_template_yaml new file mode 100644 index 00000000..036b8619 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/_de_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: de +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de.yaml b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de.yaml new file mode 100644 index 00000000..c09da268 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_de +task: + - global_mmlu_full_de_stem + - global_mmlu_full_de_other + - global_mmlu_full_de_social_sciences + - global_mmlu_full_de_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_humanities.yaml b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_humanities.yaml new file mode 100644 index 00000000..df571c67 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_de_humanities +task: + - global_mmlu_full_de_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_other.yaml b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_other.yaml new file mode 100644 index 00000000..bfff864e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_de_other +task: + - global_mmlu_full_de_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_social_sciences.yaml new file mode 100644 index 00000000..8cf304a2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_de_social_sciences +task: + - global_mmlu_full_de_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_stem.yaml b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_stem.yaml new file mode 100644 index 00000000..75d1aa5a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_de_stem +task: + - global_mmlu_full_de_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_abstract_algebra.yaml new file mode 100644 index 00000000..07cd2356 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_anatomy.yaml new file mode 100644 index 00000000..9deb16a6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_astronomy.yaml new file mode 100644 index 00000000..6a743d45 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_business_ethics.yaml new file mode 100644 index 00000000..37bf9d45 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_clinical_knowledge.yaml new file mode 100644 index 00000000..c5ad878a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_biology.yaml new file mode 100644 index 00000000..200f9239 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_chemistry.yaml new file mode 100644 index 00000000..2bbc4d46 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_computer_science.yaml new file mode 100644 index 00000000..ac903e3a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_mathematics.yaml new file mode 100644 index 00000000..616010ca --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_medicine.yaml new file mode 100644 index 00000000..b9648ce8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_physics.yaml new file mode 100644 index 00000000..d3bc6892 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_computer_security.yaml new file mode 100644 index 00000000..fee01f9e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_conceptual_physics.yaml new file mode 100644 index 00000000..201c17d7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_econometrics.yaml new file mode 100644 index 00000000..1d902c3c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_electrical_engineering.yaml new file mode 100644 index 00000000..8dcb6c48 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_elementary_mathematics.yaml new file mode 100644 index 00000000..a1ca41ce --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_formal_logic.yaml new file mode 100644 index 00000000..6e16729e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_global_facts.yaml new file mode 100644 index 00000000..a7b09289 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_biology.yaml new file mode 100644 index 00000000..0ad59551 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_chemistry.yaml new file mode 100644 index 00000000..6c0fbd55 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_computer_science.yaml new file mode 100644 index 00000000..0aea5ada --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_european_history.yaml new file mode 100644 index 00000000..97293b49 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_geography.yaml new file mode 100644 index 00000000..d26a65d9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_government_and_politics.yaml new file mode 100644 index 00000000..b6ec78e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_macroeconomics.yaml new file mode 100644 index 00000000..53489d85 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_mathematics.yaml new file mode 100644 index 00000000..44a5666f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_microeconomics.yaml new file mode 100644 index 00000000..3b911297 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_physics.yaml new file mode 100644 index 00000000..8d17d047 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_psychology.yaml new file mode 100644 index 00000000..ae768002 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_statistics.yaml new file mode 100644 index 00000000..4c272287 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_us_history.yaml new file mode 100644 index 00000000..9c1eff81 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_world_history.yaml new file mode 100644 index 00000000..11f804a6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_human_aging.yaml new file mode 100644 index 00000000..7d5b4d77 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_human_sexuality.yaml new file mode 100644 index 00000000..b3f09c7f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_international_law.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_international_law.yaml new file mode 100644 index 00000000..34bb5918 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_international_law diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_jurisprudence.yaml new file mode 100644 index 00000000..585e99b3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_logical_fallacies.yaml new file mode 100644 index 00000000..dd09d6ad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_machine_learning.yaml new file mode 100644 index 00000000..dfe82a9b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_management.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_management.yaml new file mode 100644 index 00000000..7304da38 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_management diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_marketing.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_marketing.yaml new file mode 100644 index 00000000..2143e4f1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_marketing diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_medical_genetics.yaml new file mode 100644 index 00000000..01549868 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_miscellaneous.yaml new file mode 100644 index 00000000..0c8bd533 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_moral_disputes.yaml new file mode 100644 index 00000000..f03361ae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_moral_scenarios.yaml new file mode 100644 index 00000000..a36519a7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_nutrition.yaml new file mode 100644 index 00000000..799065cb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_philosophy.yaml new file mode 100644 index 00000000..a5f0372b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_prehistory.yaml new file mode 100644 index 00000000..2145e87d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_accounting.yaml new file mode 100644 index 00000000..7ad55e97 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_law.yaml new file mode 100644 index 00000000..6f4e338f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_medicine.yaml new file mode 100644 index 00000000..7a1214a6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_psychology.yaml new file mode 100644 index 00000000..a2d49ec8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_public_relations.yaml new file mode 100644 index 00000000..4b7d23a8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_security_studies.yaml new file mode 100644 index 00000000..a1a3b22e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_sociology.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_sociology.yaml new file mode 100644 index 00000000..fefef9d0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_sociology diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_us_foreign_policy.yaml new file mode 100644 index 00000000..35394ab5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_virology.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_virology.yaml new file mode 100644 index 00000000..f0f2f595 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_virology diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_world_religions.yaml new file mode 100644 index 00000000..1a43e6fe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/de/utils.py b/lm_eval/tasks/global_mmlu/full/de/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/el/_el_template_yaml b/lm_eval/tasks/global_mmlu/full/el/_el_template_yaml new file mode 100644 index 00000000..5fccad5e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/_el_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: el +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el.yaml b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el.yaml new file mode 100644 index 00000000..a77feecb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_el +task: + - global_mmlu_full_el_stem + - global_mmlu_full_el_other + - global_mmlu_full_el_social_sciences + - global_mmlu_full_el_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_humanities.yaml b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_humanities.yaml new file mode 100644 index 00000000..f07f2b52 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_el_humanities +task: + - global_mmlu_full_el_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_other.yaml b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_other.yaml new file mode 100644 index 00000000..938292f0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_el_other +task: + - global_mmlu_full_el_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_social_sciences.yaml new file mode 100644 index 00000000..e72e1e9c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_el_social_sciences +task: + - global_mmlu_full_el_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_stem.yaml b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_stem.yaml new file mode 100644 index 00000000..2123be08 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_el_stem +task: + - global_mmlu_full_el_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_abstract_algebra.yaml new file mode 100644 index 00000000..bc56c069 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_anatomy.yaml new file mode 100644 index 00000000..0b2e0e7f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_astronomy.yaml new file mode 100644 index 00000000..7faf7389 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_business_ethics.yaml new file mode 100644 index 00000000..0e8b5bb4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_clinical_knowledge.yaml new file mode 100644 index 00000000..51ade421 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_biology.yaml new file mode 100644 index 00000000..cf3aa362 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_chemistry.yaml new file mode 100644 index 00000000..cd8e1dac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_computer_science.yaml new file mode 100644 index 00000000..f1ea0859 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_mathematics.yaml new file mode 100644 index 00000000..0ec055b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_medicine.yaml new file mode 100644 index 00000000..b16b545b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_physics.yaml new file mode 100644 index 00000000..a4630f7f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_computer_security.yaml new file mode 100644 index 00000000..a40228ea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_conceptual_physics.yaml new file mode 100644 index 00000000..e7baf6e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_econometrics.yaml new file mode 100644 index 00000000..48e59021 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_electrical_engineering.yaml new file mode 100644 index 00000000..294c3c5d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_elementary_mathematics.yaml new file mode 100644 index 00000000..4373d82e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_formal_logic.yaml new file mode 100644 index 00000000..81799a17 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_global_facts.yaml new file mode 100644 index 00000000..6317eeec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_biology.yaml new file mode 100644 index 00000000..fa5958aa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_chemistry.yaml new file mode 100644 index 00000000..38053add --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_computer_science.yaml new file mode 100644 index 00000000..4fe73214 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_european_history.yaml new file mode 100644 index 00000000..8beb1e3f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_geography.yaml new file mode 100644 index 00000000..22c08321 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_government_and_politics.yaml new file mode 100644 index 00000000..e3b33041 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_macroeconomics.yaml new file mode 100644 index 00000000..63ba6a05 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_mathematics.yaml new file mode 100644 index 00000000..f6ff6e2c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_microeconomics.yaml new file mode 100644 index 00000000..5e4deeeb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_physics.yaml new file mode 100644 index 00000000..cb875703 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_psychology.yaml new file mode 100644 index 00000000..e82d1b53 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_statistics.yaml new file mode 100644 index 00000000..0003184c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_us_history.yaml new file mode 100644 index 00000000..f5e0a367 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_world_history.yaml new file mode 100644 index 00000000..ac460ea8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_human_aging.yaml new file mode 100644 index 00000000..8a40e04f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_human_sexuality.yaml new file mode 100644 index 00000000..de5075bc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_international_law.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_international_law.yaml new file mode 100644 index 00000000..2fb93f2c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_international_law diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_jurisprudence.yaml new file mode 100644 index 00000000..624e040a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_logical_fallacies.yaml new file mode 100644 index 00000000..7e5bdb4f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_machine_learning.yaml new file mode 100644 index 00000000..180f3b25 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_management.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_management.yaml new file mode 100644 index 00000000..40487fb1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_management diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_marketing.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_marketing.yaml new file mode 100644 index 00000000..781d4170 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_marketing diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_medical_genetics.yaml new file mode 100644 index 00000000..2ca01146 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_miscellaneous.yaml new file mode 100644 index 00000000..66114367 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_moral_disputes.yaml new file mode 100644 index 00000000..c553ab7b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_moral_scenarios.yaml new file mode 100644 index 00000000..14a79a4a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_nutrition.yaml new file mode 100644 index 00000000..595daa39 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_philosophy.yaml new file mode 100644 index 00000000..25b121b6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_prehistory.yaml new file mode 100644 index 00000000..5938a174 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_accounting.yaml new file mode 100644 index 00000000..002b02aa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_law.yaml new file mode 100644 index 00000000..7b457038 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_medicine.yaml new file mode 100644 index 00000000..a31d4e3b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_psychology.yaml new file mode 100644 index 00000000..6e048079 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_public_relations.yaml new file mode 100644 index 00000000..264799d6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_security_studies.yaml new file mode 100644 index 00000000..19ffae47 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_sociology.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_sociology.yaml new file mode 100644 index 00000000..f57d3e0a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_sociology diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_us_foreign_policy.yaml new file mode 100644 index 00000000..14c76440 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_virology.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_virology.yaml new file mode 100644 index 00000000..0e444358 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_virology diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_world_religions.yaml new file mode 100644 index 00000000..60f8e52e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/el/utils.py b/lm_eval/tasks/global_mmlu/full/el/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/en/_en_template_yaml b/lm_eval/tasks/global_mmlu/full/en/_en_template_yaml new file mode 100644 index 00000000..ae7da46b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/_en_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: en +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en.yaml b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en.yaml new file mode 100644 index 00000000..648a10dd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_en +task: + - global_mmlu_full_en_stem + - global_mmlu_full_en_other + - global_mmlu_full_en_social_sciences + - global_mmlu_full_en_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_humanities.yaml b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_humanities.yaml new file mode 100644 index 00000000..4455fbcf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_en_humanities +task: + - global_mmlu_full_en_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_other.yaml b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_other.yaml new file mode 100644 index 00000000..cca60e52 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_en_other +task: + - global_mmlu_full_en_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_social_sciences.yaml new file mode 100644 index 00000000..becac7a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_en_social_sciences +task: + - global_mmlu_full_en_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_stem.yaml b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_stem.yaml new file mode 100644 index 00000000..71aac061 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_en_stem +task: + - global_mmlu_full_en_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_abstract_algebra.yaml new file mode 100644 index 00000000..3d7a5ed8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_anatomy.yaml new file mode 100644 index 00000000..f2267ad8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_astronomy.yaml new file mode 100644 index 00000000..6999c30f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_business_ethics.yaml new file mode 100644 index 00000000..56a6e490 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_clinical_knowledge.yaml new file mode 100644 index 00000000..60425fad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_biology.yaml new file mode 100644 index 00000000..9b5f2f8c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_chemistry.yaml new file mode 100644 index 00000000..8e2ab91f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_computer_science.yaml new file mode 100644 index 00000000..9abf38db --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_mathematics.yaml new file mode 100644 index 00000000..5da6199f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_medicine.yaml new file mode 100644 index 00000000..c568f36b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_physics.yaml new file mode 100644 index 00000000..ac044019 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_computer_security.yaml new file mode 100644 index 00000000..be47dbde --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_conceptual_physics.yaml new file mode 100644 index 00000000..86180924 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_econometrics.yaml new file mode 100644 index 00000000..a75d329f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_electrical_engineering.yaml new file mode 100644 index 00000000..2568993f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_elementary_mathematics.yaml new file mode 100644 index 00000000..622a99f8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_formal_logic.yaml new file mode 100644 index 00000000..109ca44a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_global_facts.yaml new file mode 100644 index 00000000..39daa506 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_biology.yaml new file mode 100644 index 00000000..063392eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_chemistry.yaml new file mode 100644 index 00000000..452e9445 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_computer_science.yaml new file mode 100644 index 00000000..baf43136 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_european_history.yaml new file mode 100644 index 00000000..fceda5c2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_geography.yaml new file mode 100644 index 00000000..4fbb9ade --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_government_and_politics.yaml new file mode 100644 index 00000000..73ca9087 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_macroeconomics.yaml new file mode 100644 index 00000000..1b9ca7a9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_mathematics.yaml new file mode 100644 index 00000000..9be50ad2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_microeconomics.yaml new file mode 100644 index 00000000..d93285cb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_physics.yaml new file mode 100644 index 00000000..2f74c609 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_psychology.yaml new file mode 100644 index 00000000..365762ba --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_statistics.yaml new file mode 100644 index 00000000..d6ca42ad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_us_history.yaml new file mode 100644 index 00000000..4f20a4dd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_world_history.yaml new file mode 100644 index 00000000..d0fce403 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_human_aging.yaml new file mode 100644 index 00000000..35320a85 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_human_sexuality.yaml new file mode 100644 index 00000000..86096c5d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_international_law.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_international_law.yaml new file mode 100644 index 00000000..8a41e9fc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_international_law diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_jurisprudence.yaml new file mode 100644 index 00000000..aa34c443 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_logical_fallacies.yaml new file mode 100644 index 00000000..50c105b4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_machine_learning.yaml new file mode 100644 index 00000000..35f496c1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_management.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_management.yaml new file mode 100644 index 00000000..d8499d9f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_management diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_marketing.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_marketing.yaml new file mode 100644 index 00000000..05f8f0ec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_marketing diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_medical_genetics.yaml new file mode 100644 index 00000000..8f272510 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_miscellaneous.yaml new file mode 100644 index 00000000..a72fad22 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_moral_disputes.yaml new file mode 100644 index 00000000..2504abeb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_moral_scenarios.yaml new file mode 100644 index 00000000..4ae4c37a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_nutrition.yaml new file mode 100644 index 00000000..b5364f69 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_philosophy.yaml new file mode 100644 index 00000000..6e68d7e7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_prehistory.yaml new file mode 100644 index 00000000..72e93368 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_accounting.yaml new file mode 100644 index 00000000..cdb66ead --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_law.yaml new file mode 100644 index 00000000..67120278 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_medicine.yaml new file mode 100644 index 00000000..ffbcb29b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_psychology.yaml new file mode 100644 index 00000000..1abea59b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_public_relations.yaml new file mode 100644 index 00000000..9df4f491 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_security_studies.yaml new file mode 100644 index 00000000..addb6934 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_sociology.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_sociology.yaml new file mode 100644 index 00000000..a198cb84 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_sociology diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_us_foreign_policy.yaml new file mode 100644 index 00000000..047b61e0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_virology.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_virology.yaml new file mode 100644 index 00000000..bb74fefd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_virology diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_world_religions.yaml new file mode 100644 index 00000000..2c453bf7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/en/utils.py b/lm_eval/tasks/global_mmlu/full/en/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/es/_es_template_yaml b/lm_eval/tasks/global_mmlu/full/es/_es_template_yaml new file mode 100644 index 00000000..443af17c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/_es_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: es +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es.yaml b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es.yaml new file mode 100644 index 00000000..832001c1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_es +task: + - global_mmlu_full_es_stem + - global_mmlu_full_es_other + - global_mmlu_full_es_social_sciences + - global_mmlu_full_es_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_humanities.yaml b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_humanities.yaml new file mode 100644 index 00000000..bda6944e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_es_humanities +task: + - global_mmlu_full_es_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_other.yaml b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_other.yaml new file mode 100644 index 00000000..610366ef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_es_other +task: + - global_mmlu_full_es_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_social_sciences.yaml new file mode 100644 index 00000000..00948690 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_es_social_sciences +task: + - global_mmlu_full_es_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_stem.yaml b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_stem.yaml new file mode 100644 index 00000000..483a8fd6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_es_stem +task: + - global_mmlu_full_es_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_abstract_algebra.yaml new file mode 100644 index 00000000..02fb7200 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_anatomy.yaml new file mode 100644 index 00000000..40f05e7b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_astronomy.yaml new file mode 100644 index 00000000..fb688c13 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_business_ethics.yaml new file mode 100644 index 00000000..aab858f1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_clinical_knowledge.yaml new file mode 100644 index 00000000..a3483f8d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_biology.yaml new file mode 100644 index 00000000..36658ab6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_chemistry.yaml new file mode 100644 index 00000000..47a47444 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_computer_science.yaml new file mode 100644 index 00000000..4154324e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_mathematics.yaml new file mode 100644 index 00000000..85bc6261 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_medicine.yaml new file mode 100644 index 00000000..40e8d129 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_physics.yaml new file mode 100644 index 00000000..7ebc5e95 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_computer_security.yaml new file mode 100644 index 00000000..b586eb2b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_conceptual_physics.yaml new file mode 100644 index 00000000..4186cec6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_econometrics.yaml new file mode 100644 index 00000000..3d61c8f9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_electrical_engineering.yaml new file mode 100644 index 00000000..1a454d79 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_elementary_mathematics.yaml new file mode 100644 index 00000000..772436e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_formal_logic.yaml new file mode 100644 index 00000000..da6223fe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_global_facts.yaml new file mode 100644 index 00000000..ae3b5912 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_biology.yaml new file mode 100644 index 00000000..79a72140 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_chemistry.yaml new file mode 100644 index 00000000..27ba7570 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_computer_science.yaml new file mode 100644 index 00000000..72ad4505 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_european_history.yaml new file mode 100644 index 00000000..2cec9d5f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_geography.yaml new file mode 100644 index 00000000..5ee91f71 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_government_and_politics.yaml new file mode 100644 index 00000000..b3f10319 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_macroeconomics.yaml new file mode 100644 index 00000000..d555129a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_mathematics.yaml new file mode 100644 index 00000000..a1216336 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_microeconomics.yaml new file mode 100644 index 00000000..d4c28844 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_physics.yaml new file mode 100644 index 00000000..fb83ad1e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_psychology.yaml new file mode 100644 index 00000000..4bcd53e4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_statistics.yaml new file mode 100644 index 00000000..900936eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_us_history.yaml new file mode 100644 index 00000000..d54acd65 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_world_history.yaml new file mode 100644 index 00000000..2a654fe8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_human_aging.yaml new file mode 100644 index 00000000..47bd8900 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_human_sexuality.yaml new file mode 100644 index 00000000..29925c34 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_international_law.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_international_law.yaml new file mode 100644 index 00000000..abe4ef94 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_international_law diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_jurisprudence.yaml new file mode 100644 index 00000000..751878fe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_logical_fallacies.yaml new file mode 100644 index 00000000..55233f7f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_machine_learning.yaml new file mode 100644 index 00000000..9a11e310 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_management.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_management.yaml new file mode 100644 index 00000000..a31b4c26 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_management diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_marketing.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_marketing.yaml new file mode 100644 index 00000000..22136569 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_marketing diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_medical_genetics.yaml new file mode 100644 index 00000000..18fc7a23 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_miscellaneous.yaml new file mode 100644 index 00000000..5b3955a9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_moral_disputes.yaml new file mode 100644 index 00000000..57095856 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_moral_scenarios.yaml new file mode 100644 index 00000000..ed31f8cc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_nutrition.yaml new file mode 100644 index 00000000..07746d09 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_philosophy.yaml new file mode 100644 index 00000000..3853e162 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_prehistory.yaml new file mode 100644 index 00000000..b75ac9df --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_accounting.yaml new file mode 100644 index 00000000..da8fd46f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_law.yaml new file mode 100644 index 00000000..ddd0ab3d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_medicine.yaml new file mode 100644 index 00000000..6be1ae81 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_psychology.yaml new file mode 100644 index 00000000..cadc7f96 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_public_relations.yaml new file mode 100644 index 00000000..72609ea9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_security_studies.yaml new file mode 100644 index 00000000..319123c6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_sociology.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_sociology.yaml new file mode 100644 index 00000000..dec44c29 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_sociology diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_us_foreign_policy.yaml new file mode 100644 index 00000000..a18a3942 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_virology.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_virology.yaml new file mode 100644 index 00000000..b06431e2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_virology diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_world_religions.yaml new file mode 100644 index 00000000..4d9d6b79 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/es/utils.py b/lm_eval/tasks/global_mmlu/full/es/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/fa/_fa_template_yaml b/lm_eval/tasks/global_mmlu/full/fa/_fa_template_yaml new file mode 100644 index 00000000..952259b2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/_fa_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: fa +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa.yaml b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa.yaml new file mode 100644 index 00000000..9edb8540 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_fa +task: + - global_mmlu_full_fa_stem + - global_mmlu_full_fa_other + - global_mmlu_full_fa_social_sciences + - global_mmlu_full_fa_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_humanities.yaml b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_humanities.yaml new file mode 100644 index 00000000..f36ecea5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fa_humanities +task: + - global_mmlu_full_fa_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_other.yaml b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_other.yaml new file mode 100644 index 00000000..dd57bb86 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fa_other +task: + - global_mmlu_full_fa_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_social_sciences.yaml new file mode 100644 index 00000000..9e7da860 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fa_social_sciences +task: + - global_mmlu_full_fa_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_stem.yaml b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_stem.yaml new file mode 100644 index 00000000..5bf2eb01 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fa_stem +task: + - global_mmlu_full_fa_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_abstract_algebra.yaml new file mode 100644 index 00000000..1014795f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_anatomy.yaml new file mode 100644 index 00000000..317705c9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_astronomy.yaml new file mode 100644 index 00000000..45475964 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_business_ethics.yaml new file mode 100644 index 00000000..3c0dd60b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_clinical_knowledge.yaml new file mode 100644 index 00000000..a7af0e21 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_biology.yaml new file mode 100644 index 00000000..31ae6d71 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_chemistry.yaml new file mode 100644 index 00000000..8b099f41 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_computer_science.yaml new file mode 100644 index 00000000..07491e5b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_mathematics.yaml new file mode 100644 index 00000000..774f6b97 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_medicine.yaml new file mode 100644 index 00000000..13d6f5a2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_physics.yaml new file mode 100644 index 00000000..1e415b8c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_computer_security.yaml new file mode 100644 index 00000000..ae47213b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_conceptual_physics.yaml new file mode 100644 index 00000000..c3f2ba4c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_econometrics.yaml new file mode 100644 index 00000000..6cf79a92 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_electrical_engineering.yaml new file mode 100644 index 00000000..ab7aa858 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_elementary_mathematics.yaml new file mode 100644 index 00000000..b83f6ddc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_formal_logic.yaml new file mode 100644 index 00000000..cab2effa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_global_facts.yaml new file mode 100644 index 00000000..93d11b75 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_biology.yaml new file mode 100644 index 00000000..59b6869b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_chemistry.yaml new file mode 100644 index 00000000..8d15d4b4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_computer_science.yaml new file mode 100644 index 00000000..a02df4f1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_european_history.yaml new file mode 100644 index 00000000..e18b2c7b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_geography.yaml new file mode 100644 index 00000000..d94c7e89 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_government_and_politics.yaml new file mode 100644 index 00000000..e1007895 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_macroeconomics.yaml new file mode 100644 index 00000000..a9ad0633 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_mathematics.yaml new file mode 100644 index 00000000..2c733b17 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_microeconomics.yaml new file mode 100644 index 00000000..4f88f0aa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_physics.yaml new file mode 100644 index 00000000..64fdef98 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_psychology.yaml new file mode 100644 index 00000000..c43a115b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_statistics.yaml new file mode 100644 index 00000000..ebb4e82d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_us_history.yaml new file mode 100644 index 00000000..a0041e33 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_world_history.yaml new file mode 100644 index 00000000..66f38f54 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_human_aging.yaml new file mode 100644 index 00000000..95a2adde --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_human_sexuality.yaml new file mode 100644 index 00000000..475a71fd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_international_law.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_international_law.yaml new file mode 100644 index 00000000..c0d6aec2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_international_law diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_jurisprudence.yaml new file mode 100644 index 00000000..0d82bd5a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_logical_fallacies.yaml new file mode 100644 index 00000000..a8e89d3c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_machine_learning.yaml new file mode 100644 index 00000000..4e4d1a8a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_management.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_management.yaml new file mode 100644 index 00000000..e7e592ba --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_management diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_marketing.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_marketing.yaml new file mode 100644 index 00000000..c0e7ef1f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_marketing diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_medical_genetics.yaml new file mode 100644 index 00000000..c31679ec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_miscellaneous.yaml new file mode 100644 index 00000000..652d5a33 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_moral_disputes.yaml new file mode 100644 index 00000000..16adcb26 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_moral_scenarios.yaml new file mode 100644 index 00000000..92d018f2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_nutrition.yaml new file mode 100644 index 00000000..ae7e065e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_philosophy.yaml new file mode 100644 index 00000000..cd8513da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_prehistory.yaml new file mode 100644 index 00000000..9fd6bb3d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_accounting.yaml new file mode 100644 index 00000000..99f6c316 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_law.yaml new file mode 100644 index 00000000..9fee460a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_medicine.yaml new file mode 100644 index 00000000..13d67d45 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_psychology.yaml new file mode 100644 index 00000000..3e821145 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_public_relations.yaml new file mode 100644 index 00000000..de6cc311 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_security_studies.yaml new file mode 100644 index 00000000..64d5fd14 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_sociology.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_sociology.yaml new file mode 100644 index 00000000..cf3d9564 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_sociology diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_us_foreign_policy.yaml new file mode 100644 index 00000000..38d51936 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_virology.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_virology.yaml new file mode 100644 index 00000000..39c5188d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_virology diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_world_religions.yaml new file mode 100644 index 00000000..44e6fc82 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/fa/utils.py b/lm_eval/tasks/global_mmlu/full/fa/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/fil/_fil_template_yaml b/lm_eval/tasks/global_mmlu/full/fil/_fil_template_yaml new file mode 100644 index 00000000..32dc097a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/_fil_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: fil +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil.yaml b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil.yaml new file mode 100644 index 00000000..24fcb6d2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_fil +task: + - global_mmlu_full_fil_stem + - global_mmlu_full_fil_other + - global_mmlu_full_fil_social_sciences + - global_mmlu_full_fil_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_humanities.yaml b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_humanities.yaml new file mode 100644 index 00000000..061eb818 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fil_humanities +task: + - global_mmlu_full_fil_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_other.yaml b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_other.yaml new file mode 100644 index 00000000..fea793ef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fil_other +task: + - global_mmlu_full_fil_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_social_sciences.yaml new file mode 100644 index 00000000..e9f79330 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fil_social_sciences +task: + - global_mmlu_full_fil_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_stem.yaml b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_stem.yaml new file mode 100644 index 00000000..2e567c70 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fil_stem +task: + - global_mmlu_full_fil_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_abstract_algebra.yaml new file mode 100644 index 00000000..7eef19d6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_anatomy.yaml new file mode 100644 index 00000000..e87d8d80 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_astronomy.yaml new file mode 100644 index 00000000..6c258877 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_business_ethics.yaml new file mode 100644 index 00000000..139f3ccc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_clinical_knowledge.yaml new file mode 100644 index 00000000..fc160a99 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_biology.yaml new file mode 100644 index 00000000..ff6fa3d8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_chemistry.yaml new file mode 100644 index 00000000..61f0df50 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_computer_science.yaml new file mode 100644 index 00000000..1385b934 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_mathematics.yaml new file mode 100644 index 00000000..afe15d7d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_medicine.yaml new file mode 100644 index 00000000..221289f7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_physics.yaml new file mode 100644 index 00000000..863792b3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_computer_security.yaml new file mode 100644 index 00000000..7971c606 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_conceptual_physics.yaml new file mode 100644 index 00000000..77a75ccf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_econometrics.yaml new file mode 100644 index 00000000..bd98fc8d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_electrical_engineering.yaml new file mode 100644 index 00000000..98e48a27 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_elementary_mathematics.yaml new file mode 100644 index 00000000..eba4149c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_formal_logic.yaml new file mode 100644 index 00000000..f1796059 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_global_facts.yaml new file mode 100644 index 00000000..96886181 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_biology.yaml new file mode 100644 index 00000000..93d94120 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_chemistry.yaml new file mode 100644 index 00000000..9ec56d5b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_computer_science.yaml new file mode 100644 index 00000000..82d86aed --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_european_history.yaml new file mode 100644 index 00000000..7fcdec0a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_geography.yaml new file mode 100644 index 00000000..96268192 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_government_and_politics.yaml new file mode 100644 index 00000000..f826de3d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_macroeconomics.yaml new file mode 100644 index 00000000..104a7088 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_mathematics.yaml new file mode 100644 index 00000000..1d499b4d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_microeconomics.yaml new file mode 100644 index 00000000..43fcc04d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_physics.yaml new file mode 100644 index 00000000..175f31ee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_psychology.yaml new file mode 100644 index 00000000..2fc2dd5c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_statistics.yaml new file mode 100644 index 00000000..0540d57c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_us_history.yaml new file mode 100644 index 00000000..d0801af2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_world_history.yaml new file mode 100644 index 00000000..724b7ce8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_human_aging.yaml new file mode 100644 index 00000000..6c2c1141 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_human_sexuality.yaml new file mode 100644 index 00000000..1672d5b2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_international_law.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_international_law.yaml new file mode 100644 index 00000000..4c5da91c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_international_law diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_jurisprudence.yaml new file mode 100644 index 00000000..dea2b20b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_logical_fallacies.yaml new file mode 100644 index 00000000..6a30c724 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_machine_learning.yaml new file mode 100644 index 00000000..d2a7062c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_management.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_management.yaml new file mode 100644 index 00000000..1ea56835 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_management diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_marketing.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_marketing.yaml new file mode 100644 index 00000000..82d4490a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_marketing diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_medical_genetics.yaml new file mode 100644 index 00000000..bdeb0984 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_miscellaneous.yaml new file mode 100644 index 00000000..51c56a3b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_moral_disputes.yaml new file mode 100644 index 00000000..53148a54 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_moral_scenarios.yaml new file mode 100644 index 00000000..fb5fecf2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_nutrition.yaml new file mode 100644 index 00000000..35859dc2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_philosophy.yaml new file mode 100644 index 00000000..dc2d414e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_prehistory.yaml new file mode 100644 index 00000000..abf65fd2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_accounting.yaml new file mode 100644 index 00000000..04ce3436 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_law.yaml new file mode 100644 index 00000000..e5694cf4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_medicine.yaml new file mode 100644 index 00000000..e7fd0446 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_psychology.yaml new file mode 100644 index 00000000..b9ce14aa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_public_relations.yaml new file mode 100644 index 00000000..fdae5298 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_security_studies.yaml new file mode 100644 index 00000000..4a03eec1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_sociology.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_sociology.yaml new file mode 100644 index 00000000..bc0ed052 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_sociology diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_us_foreign_policy.yaml new file mode 100644 index 00000000..ed40afb6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_virology.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_virology.yaml new file mode 100644 index 00000000..85ed4d42 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_virology diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_world_religions.yaml new file mode 100644 index 00000000..3ee6bce1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/fil/utils.py b/lm_eval/tasks/global_mmlu/full/fil/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/fr/_fr_template_yaml b/lm_eval/tasks/global_mmlu/full/fr/_fr_template_yaml new file mode 100644 index 00000000..47ca7972 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/_fr_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: fr +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr.yaml b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr.yaml new file mode 100644 index 00000000..e85d6746 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_fr +task: + - global_mmlu_full_fr_stem + - global_mmlu_full_fr_other + - global_mmlu_full_fr_social_sciences + - global_mmlu_full_fr_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_humanities.yaml b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_humanities.yaml new file mode 100644 index 00000000..697e3a29 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fr_humanities +task: + - global_mmlu_full_fr_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_other.yaml b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_other.yaml new file mode 100644 index 00000000..9b2ada6b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fr_other +task: + - global_mmlu_full_fr_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_social_sciences.yaml new file mode 100644 index 00000000..ac7e4605 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fr_social_sciences +task: + - global_mmlu_full_fr_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_stem.yaml b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_stem.yaml new file mode 100644 index 00000000..c81d601f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fr_stem +task: + - global_mmlu_full_fr_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_abstract_algebra.yaml new file mode 100644 index 00000000..bf7d76c3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_anatomy.yaml new file mode 100644 index 00000000..e9a96927 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_astronomy.yaml new file mode 100644 index 00000000..6e4ca5a7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_business_ethics.yaml new file mode 100644 index 00000000..df3c1fbd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_clinical_knowledge.yaml new file mode 100644 index 00000000..b0daa2e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_biology.yaml new file mode 100644 index 00000000..1e997578 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_chemistry.yaml new file mode 100644 index 00000000..9c1c3189 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_computer_science.yaml new file mode 100644 index 00000000..078108f8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_mathematics.yaml new file mode 100644 index 00000000..bf2f2940 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_medicine.yaml new file mode 100644 index 00000000..8c9ccc80 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_physics.yaml new file mode 100644 index 00000000..01dcea37 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_computer_security.yaml new file mode 100644 index 00000000..794f64be --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_conceptual_physics.yaml new file mode 100644 index 00000000..12c6afc2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_econometrics.yaml new file mode 100644 index 00000000..1f33ddab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_electrical_engineering.yaml new file mode 100644 index 00000000..8dcb0585 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_elementary_mathematics.yaml new file mode 100644 index 00000000..2658ce96 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_formal_logic.yaml new file mode 100644 index 00000000..5239cb1c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_global_facts.yaml new file mode 100644 index 00000000..2763dcb5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_biology.yaml new file mode 100644 index 00000000..2a6a26c9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_chemistry.yaml new file mode 100644 index 00000000..6ffacc29 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_computer_science.yaml new file mode 100644 index 00000000..d1720422 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_european_history.yaml new file mode 100644 index 00000000..9788e7be --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_geography.yaml new file mode 100644 index 00000000..3e2ff22e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_government_and_politics.yaml new file mode 100644 index 00000000..2aba3b61 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_macroeconomics.yaml new file mode 100644 index 00000000..21fb1df5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_mathematics.yaml new file mode 100644 index 00000000..a975d1fc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_microeconomics.yaml new file mode 100644 index 00000000..ff654ff3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_physics.yaml new file mode 100644 index 00000000..4038c956 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_psychology.yaml new file mode 100644 index 00000000..a65da780 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_statistics.yaml new file mode 100644 index 00000000..37c75136 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_us_history.yaml new file mode 100644 index 00000000..a0e123f2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_world_history.yaml new file mode 100644 index 00000000..e2a9cf6b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_human_aging.yaml new file mode 100644 index 00000000..b9e9ece9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_human_sexuality.yaml new file mode 100644 index 00000000..eac30d27 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_international_law.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_international_law.yaml new file mode 100644 index 00000000..2e15b0fb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_international_law diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_jurisprudence.yaml new file mode 100644 index 00000000..f42079c9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_logical_fallacies.yaml new file mode 100644 index 00000000..68ebdb71 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_machine_learning.yaml new file mode 100644 index 00000000..25a8df3a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_management.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_management.yaml new file mode 100644 index 00000000..73f7d869 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_management diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_marketing.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_marketing.yaml new file mode 100644 index 00000000..8a19b83e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_marketing diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_medical_genetics.yaml new file mode 100644 index 00000000..d15774f1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_miscellaneous.yaml new file mode 100644 index 00000000..d7519709 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_moral_disputes.yaml new file mode 100644 index 00000000..f625921e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_moral_scenarios.yaml new file mode 100644 index 00000000..4575ca04 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_nutrition.yaml new file mode 100644 index 00000000..61521c91 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_philosophy.yaml new file mode 100644 index 00000000..7f771c34 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_prehistory.yaml new file mode 100644 index 00000000..3bcac0f5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_accounting.yaml new file mode 100644 index 00000000..a06a7af5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_law.yaml new file mode 100644 index 00000000..2ecf2e8e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_medicine.yaml new file mode 100644 index 00000000..983a2d38 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_psychology.yaml new file mode 100644 index 00000000..59d9aa30 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_public_relations.yaml new file mode 100644 index 00000000..d84b7ad0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_security_studies.yaml new file mode 100644 index 00000000..fcd82b7d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_sociology.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_sociology.yaml new file mode 100644 index 00000000..ff7b8fd3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_sociology diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_us_foreign_policy.yaml new file mode 100644 index 00000000..d92c2095 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_virology.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_virology.yaml new file mode 100644 index 00000000..211c96a7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_virology diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_world_religions.yaml new file mode 100644 index 00000000..f1f168ae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/fr/utils.py b/lm_eval/tasks/global_mmlu/full/fr/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha.yaml b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha.yaml new file mode 100644 index 00000000..08a958bb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ha +task: + - global_mmlu_full_ha_stem + - global_mmlu_full_ha_other + - global_mmlu_full_ha_social_sciences + - global_mmlu_full_ha_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_humanities.yaml new file mode 100644 index 00000000..84cce38d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ha_humanities +task: + - global_mmlu_full_ha_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_other.yaml b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_other.yaml new file mode 100644 index 00000000..73a6ea0d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ha_other +task: + - global_mmlu_full_ha_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_social_sciences.yaml new file mode 100644 index 00000000..8b520a5d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ha_social_sciences +task: + - global_mmlu_full_ha_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_stem.yaml b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_stem.yaml new file mode 100644 index 00000000..6213d280 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ha_stem +task: + - global_mmlu_full_ha_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ha/_ha_template_yaml b/lm_eval/tasks/global_mmlu/full/ha/_ha_template_yaml new file mode 100644 index 00000000..8521fe50 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/_ha_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ha +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_abstract_algebra.yaml new file mode 100644 index 00000000..62ad5e7d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_anatomy.yaml new file mode 100644 index 00000000..2ead0f6c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_astronomy.yaml new file mode 100644 index 00000000..1616398f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_business_ethics.yaml new file mode 100644 index 00000000..c1719b0e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_clinical_knowledge.yaml new file mode 100644 index 00000000..dcef5e27 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_biology.yaml new file mode 100644 index 00000000..f2825694 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_chemistry.yaml new file mode 100644 index 00000000..73a422e3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_computer_science.yaml new file mode 100644 index 00000000..7bdb65c9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_mathematics.yaml new file mode 100644 index 00000000..47e5326c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_medicine.yaml new file mode 100644 index 00000000..9065f085 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_physics.yaml new file mode 100644 index 00000000..40aa11c5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_computer_security.yaml new file mode 100644 index 00000000..38d1e9c2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_conceptual_physics.yaml new file mode 100644 index 00000000..7326514a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_econometrics.yaml new file mode 100644 index 00000000..e865b6bc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_electrical_engineering.yaml new file mode 100644 index 00000000..9457d1bb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_elementary_mathematics.yaml new file mode 100644 index 00000000..e04fb1ff --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_formal_logic.yaml new file mode 100644 index 00000000..03c9cbac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_global_facts.yaml new file mode 100644 index 00000000..db104be5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_biology.yaml new file mode 100644 index 00000000..729fed2b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_chemistry.yaml new file mode 100644 index 00000000..13f5621b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_computer_science.yaml new file mode 100644 index 00000000..1914e1fb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_european_history.yaml new file mode 100644 index 00000000..fa878b03 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_geography.yaml new file mode 100644 index 00000000..10a13674 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_government_and_politics.yaml new file mode 100644 index 00000000..eebac409 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_macroeconomics.yaml new file mode 100644 index 00000000..0a22ab84 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_mathematics.yaml new file mode 100644 index 00000000..fc681f90 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_microeconomics.yaml new file mode 100644 index 00000000..81bb343c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_physics.yaml new file mode 100644 index 00000000..2bc4cc4d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_psychology.yaml new file mode 100644 index 00000000..c5d46e5e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_statistics.yaml new file mode 100644 index 00000000..4848cc31 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_us_history.yaml new file mode 100644 index 00000000..7a22c79a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_world_history.yaml new file mode 100644 index 00000000..13882279 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_human_aging.yaml new file mode 100644 index 00000000..51ff436b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_human_sexuality.yaml new file mode 100644 index 00000000..1a36fb86 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_international_law.yaml new file mode 100644 index 00000000..f1c9cc1c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_jurisprudence.yaml new file mode 100644 index 00000000..4bc1314b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_logical_fallacies.yaml new file mode 100644 index 00000000..259534b9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_machine_learning.yaml new file mode 100644 index 00000000..c94a073b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_management.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_management.yaml new file mode 100644 index 00000000..666d4720 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_management diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_marketing.yaml new file mode 100644 index 00000000..9528a1f3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_medical_genetics.yaml new file mode 100644 index 00000000..92f0a408 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_miscellaneous.yaml new file mode 100644 index 00000000..fc97a8dc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_moral_disputes.yaml new file mode 100644 index 00000000..dbcf96c2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_moral_scenarios.yaml new file mode 100644 index 00000000..aa7b4266 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_nutrition.yaml new file mode 100644 index 00000000..b413e4be --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_philosophy.yaml new file mode 100644 index 00000000..118e4801 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_prehistory.yaml new file mode 100644 index 00000000..a310d023 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_accounting.yaml new file mode 100644 index 00000000..79536ddc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_law.yaml new file mode 100644 index 00000000..613170da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_medicine.yaml new file mode 100644 index 00000000..bd65c233 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_psychology.yaml new file mode 100644 index 00000000..cf7ecb1f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_public_relations.yaml new file mode 100644 index 00000000..c9cba53f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_security_studies.yaml new file mode 100644 index 00000000..fe767686 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_sociology.yaml new file mode 100644 index 00000000..94f8e311 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_us_foreign_policy.yaml new file mode 100644 index 00000000..54f82b3f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_virology.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_virology.yaml new file mode 100644 index 00000000..ce7d224d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_virology diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_world_religions.yaml new file mode 100644 index 00000000..67a6d33d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ha/utils.py b/lm_eval/tasks/global_mmlu/full/ha/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he.yaml b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he.yaml new file mode 100644 index 00000000..ff0a5e8f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_he +task: + - global_mmlu_full_he_stem + - global_mmlu_full_he_other + - global_mmlu_full_he_social_sciences + - global_mmlu_full_he_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_humanities.yaml b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_humanities.yaml new file mode 100644 index 00000000..678ee0d4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_he_humanities +task: + - global_mmlu_full_he_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_other.yaml b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_other.yaml new file mode 100644 index 00000000..c99b4806 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_he_other +task: + - global_mmlu_full_he_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_social_sciences.yaml new file mode 100644 index 00000000..12906895 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_he_social_sciences +task: + - global_mmlu_full_he_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_stem.yaml b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_stem.yaml new file mode 100644 index 00000000..f6e76e7a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_he_stem +task: + - global_mmlu_full_he_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/he/_he_template_yaml b/lm_eval/tasks/global_mmlu/full/he/_he_template_yaml new file mode 100644 index 00000000..b6ec9fc8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/_he_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: he +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_abstract_algebra.yaml new file mode 100644 index 00000000..fb197c3d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_anatomy.yaml new file mode 100644 index 00000000..3ab9ee20 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_astronomy.yaml new file mode 100644 index 00000000..8950b1e0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_business_ethics.yaml new file mode 100644 index 00000000..8c114348 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_clinical_knowledge.yaml new file mode 100644 index 00000000..1324a04d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_biology.yaml new file mode 100644 index 00000000..cecddc60 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_chemistry.yaml new file mode 100644 index 00000000..2c0f8b5d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_computer_science.yaml new file mode 100644 index 00000000..b4c36a41 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_mathematics.yaml new file mode 100644 index 00000000..3633d537 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_medicine.yaml new file mode 100644 index 00000000..a28c592e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_physics.yaml new file mode 100644 index 00000000..3893b9aa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_computer_security.yaml new file mode 100644 index 00000000..4167874e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_conceptual_physics.yaml new file mode 100644 index 00000000..2ee92851 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_econometrics.yaml new file mode 100644 index 00000000..9ceb3277 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_electrical_engineering.yaml new file mode 100644 index 00000000..00658e28 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_elementary_mathematics.yaml new file mode 100644 index 00000000..10f2ac18 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_formal_logic.yaml new file mode 100644 index 00000000..aac3f8da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_global_facts.yaml new file mode 100644 index 00000000..299a73ef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_biology.yaml new file mode 100644 index 00000000..9d3ba893 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_chemistry.yaml new file mode 100644 index 00000000..f67f8ef3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_computer_science.yaml new file mode 100644 index 00000000..7ca8b6f8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_european_history.yaml new file mode 100644 index 00000000..58e4081a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_geography.yaml new file mode 100644 index 00000000..2d76e387 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_government_and_politics.yaml new file mode 100644 index 00000000..e3745110 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_macroeconomics.yaml new file mode 100644 index 00000000..fa0b7c71 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_mathematics.yaml new file mode 100644 index 00000000..7f78a5c5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_microeconomics.yaml new file mode 100644 index 00000000..15be9243 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_physics.yaml new file mode 100644 index 00000000..6f309c0b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_psychology.yaml new file mode 100644 index 00000000..1ae831c6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_statistics.yaml new file mode 100644 index 00000000..3a2e8170 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_us_history.yaml new file mode 100644 index 00000000..c05da45a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_world_history.yaml new file mode 100644 index 00000000..b818e4fe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_human_aging.yaml new file mode 100644 index 00000000..49f7ce5d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_human_sexuality.yaml new file mode 100644 index 00000000..91d08567 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_international_law.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_international_law.yaml new file mode 100644 index 00000000..1bedb4f4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_international_law diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_jurisprudence.yaml new file mode 100644 index 00000000..39fe15a2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_logical_fallacies.yaml new file mode 100644 index 00000000..e54b58b3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_machine_learning.yaml new file mode 100644 index 00000000..8190e96a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_management.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_management.yaml new file mode 100644 index 00000000..d5811f80 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_management diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_marketing.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_marketing.yaml new file mode 100644 index 00000000..7fe44232 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_marketing diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_medical_genetics.yaml new file mode 100644 index 00000000..8c9082c5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_miscellaneous.yaml new file mode 100644 index 00000000..bc419dee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_moral_disputes.yaml new file mode 100644 index 00000000..d889642b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_moral_scenarios.yaml new file mode 100644 index 00000000..11554823 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_nutrition.yaml new file mode 100644 index 00000000..30d49701 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_philosophy.yaml new file mode 100644 index 00000000..458632de --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_prehistory.yaml new file mode 100644 index 00000000..93835673 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_accounting.yaml new file mode 100644 index 00000000..aed28636 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_law.yaml new file mode 100644 index 00000000..38a9e3cc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_medicine.yaml new file mode 100644 index 00000000..e8ca950c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_psychology.yaml new file mode 100644 index 00000000..f82c2892 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_public_relations.yaml new file mode 100644 index 00000000..e3aff661 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_security_studies.yaml new file mode 100644 index 00000000..e99aa015 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_sociology.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_sociology.yaml new file mode 100644 index 00000000..de81b92c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_sociology diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_us_foreign_policy.yaml new file mode 100644 index 00000000..7be65044 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_virology.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_virology.yaml new file mode 100644 index 00000000..b6f51e1b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_virology diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_world_religions.yaml new file mode 100644 index 00000000..e3d10a0d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/he/utils.py b/lm_eval/tasks/global_mmlu/full/he/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi.yaml b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi.yaml new file mode 100644 index 00000000..ed54a6ad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_hi +task: + - global_mmlu_full_hi_stem + - global_mmlu_full_hi_other + - global_mmlu_full_hi_social_sciences + - global_mmlu_full_hi_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_humanities.yaml b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_humanities.yaml new file mode 100644 index 00000000..36492fa3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_hi_humanities +task: + - global_mmlu_full_hi_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_other.yaml b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_other.yaml new file mode 100644 index 00000000..08dc16b0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_hi_other +task: + - global_mmlu_full_hi_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_social_sciences.yaml new file mode 100644 index 00000000..0a4dfdd7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_hi_social_sciences +task: + - global_mmlu_full_hi_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_stem.yaml b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_stem.yaml new file mode 100644 index 00000000..7a0123ae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_hi_stem +task: + - global_mmlu_full_hi_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/hi/_hi_template_yaml b/lm_eval/tasks/global_mmlu/full/hi/_hi_template_yaml new file mode 100644 index 00000000..18c6286e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/_hi_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: hi +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_abstract_algebra.yaml new file mode 100644 index 00000000..f239f067 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_anatomy.yaml new file mode 100644 index 00000000..dfcd776e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_astronomy.yaml new file mode 100644 index 00000000..dbb6763d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_business_ethics.yaml new file mode 100644 index 00000000..5882427e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_clinical_knowledge.yaml new file mode 100644 index 00000000..7b0c6c3b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_biology.yaml new file mode 100644 index 00000000..d5326c8d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_chemistry.yaml new file mode 100644 index 00000000..bf9e2130 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_computer_science.yaml new file mode 100644 index 00000000..c79f4250 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_mathematics.yaml new file mode 100644 index 00000000..4e8b0427 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_medicine.yaml new file mode 100644 index 00000000..7e8c0df2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_physics.yaml new file mode 100644 index 00000000..5fe337ee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_computer_security.yaml new file mode 100644 index 00000000..029a02e0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_conceptual_physics.yaml new file mode 100644 index 00000000..a6748974 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_econometrics.yaml new file mode 100644 index 00000000..355053b2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_electrical_engineering.yaml new file mode 100644 index 00000000..04dca10d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_elementary_mathematics.yaml new file mode 100644 index 00000000..ca7a3083 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_formal_logic.yaml new file mode 100644 index 00000000..ae534fa6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_global_facts.yaml new file mode 100644 index 00000000..096fd58b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_biology.yaml new file mode 100644 index 00000000..9ef04ee5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_chemistry.yaml new file mode 100644 index 00000000..4e8913e5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_computer_science.yaml new file mode 100644 index 00000000..180eef75 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_european_history.yaml new file mode 100644 index 00000000..32abd63b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_geography.yaml new file mode 100644 index 00000000..1089908b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_government_and_politics.yaml new file mode 100644 index 00000000..fb22bb51 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_macroeconomics.yaml new file mode 100644 index 00000000..affc27c0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_mathematics.yaml new file mode 100644 index 00000000..59f97c94 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_microeconomics.yaml new file mode 100644 index 00000000..a7506a4c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_physics.yaml new file mode 100644 index 00000000..406035bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_psychology.yaml new file mode 100644 index 00000000..f5c2be37 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_statistics.yaml new file mode 100644 index 00000000..a955febe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_us_history.yaml new file mode 100644 index 00000000..6a5573f8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_world_history.yaml new file mode 100644 index 00000000..38ce4680 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_human_aging.yaml new file mode 100644 index 00000000..2486301f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_human_sexuality.yaml new file mode 100644 index 00000000..8f889885 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_international_law.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_international_law.yaml new file mode 100644 index 00000000..2cb0d834 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_international_law diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_jurisprudence.yaml new file mode 100644 index 00000000..11329130 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_logical_fallacies.yaml new file mode 100644 index 00000000..e22cedbe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_machine_learning.yaml new file mode 100644 index 00000000..134ab080 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_management.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_management.yaml new file mode 100644 index 00000000..e523b5d1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_management diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_marketing.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_marketing.yaml new file mode 100644 index 00000000..11d8930b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_marketing diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_medical_genetics.yaml new file mode 100644 index 00000000..ad38e3e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_miscellaneous.yaml new file mode 100644 index 00000000..c31f8883 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_moral_disputes.yaml new file mode 100644 index 00000000..01145f6f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_moral_scenarios.yaml new file mode 100644 index 00000000..4acbb127 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_nutrition.yaml new file mode 100644 index 00000000..889c0018 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_philosophy.yaml new file mode 100644 index 00000000..2a8aeb4d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_prehistory.yaml new file mode 100644 index 00000000..ad80a3c0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_accounting.yaml new file mode 100644 index 00000000..1f547789 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_law.yaml new file mode 100644 index 00000000..836d577d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_medicine.yaml new file mode 100644 index 00000000..7a8e7db9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_psychology.yaml new file mode 100644 index 00000000..b4ebc1a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_public_relations.yaml new file mode 100644 index 00000000..7bbf959c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_security_studies.yaml new file mode 100644 index 00000000..7faa9d43 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_sociology.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_sociology.yaml new file mode 100644 index 00000000..b0ca49ae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_sociology diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_us_foreign_policy.yaml new file mode 100644 index 00000000..d5fd9f0f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_virology.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_virology.yaml new file mode 100644 index 00000000..843ea254 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_virology diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_world_religions.yaml new file mode 100644 index 00000000..f5e56ce0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/hi/utils.py b/lm_eval/tasks/global_mmlu/full/hi/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id.yaml b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id.yaml new file mode 100644 index 00000000..f678660e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_id +task: + - global_mmlu_full_id_stem + - global_mmlu_full_id_other + - global_mmlu_full_id_social_sciences + - global_mmlu_full_id_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_humanities.yaml b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_humanities.yaml new file mode 100644 index 00000000..b9283f55 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_id_humanities +task: + - global_mmlu_full_id_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_other.yaml b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_other.yaml new file mode 100644 index 00000000..74de0f36 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_id_other +task: + - global_mmlu_full_id_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_social_sciences.yaml new file mode 100644 index 00000000..b8656b6b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_id_social_sciences +task: + - global_mmlu_full_id_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_stem.yaml b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_stem.yaml new file mode 100644 index 00000000..d0e47276 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_id_stem +task: + - global_mmlu_full_id_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/id/_id_template_yaml b/lm_eval/tasks/global_mmlu/full/id/_id_template_yaml new file mode 100644 index 00000000..32d9dc92 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/_id_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: id +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_abstract_algebra.yaml new file mode 100644 index 00000000..b18c1cd7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_anatomy.yaml new file mode 100644 index 00000000..65b83d9d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_astronomy.yaml new file mode 100644 index 00000000..11f1047c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_business_ethics.yaml new file mode 100644 index 00000000..9ed992f2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_clinical_knowledge.yaml new file mode 100644 index 00000000..8baa424f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_biology.yaml new file mode 100644 index 00000000..67b9c935 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_chemistry.yaml new file mode 100644 index 00000000..3eb5d228 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_computer_science.yaml new file mode 100644 index 00000000..1462945b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_mathematics.yaml new file mode 100644 index 00000000..98062792 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_medicine.yaml new file mode 100644 index 00000000..1a2736e8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_physics.yaml new file mode 100644 index 00000000..bb88c3f8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_computer_security.yaml new file mode 100644 index 00000000..9764ac3e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_conceptual_physics.yaml new file mode 100644 index 00000000..c70c111c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_econometrics.yaml new file mode 100644 index 00000000..7f82a74b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_electrical_engineering.yaml new file mode 100644 index 00000000..3cc2dfba --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_elementary_mathematics.yaml new file mode 100644 index 00000000..1d511b4b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_formal_logic.yaml new file mode 100644 index 00000000..7c6cef13 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_global_facts.yaml new file mode 100644 index 00000000..5e7a44da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_biology.yaml new file mode 100644 index 00000000..d39c31ab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_chemistry.yaml new file mode 100644 index 00000000..d92d827a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_computer_science.yaml new file mode 100644 index 00000000..ff714ac8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_european_history.yaml new file mode 100644 index 00000000..0d5c8141 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_geography.yaml new file mode 100644 index 00000000..1ad392b3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_government_and_politics.yaml new file mode 100644 index 00000000..850d6d82 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_macroeconomics.yaml new file mode 100644 index 00000000..c1fda5c6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_mathematics.yaml new file mode 100644 index 00000000..8a628ed9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_microeconomics.yaml new file mode 100644 index 00000000..f2c44707 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_physics.yaml new file mode 100644 index 00000000..75888a3d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_psychology.yaml new file mode 100644 index 00000000..8a6ff54b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_statistics.yaml new file mode 100644 index 00000000..ab205802 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_us_history.yaml new file mode 100644 index 00000000..8dee8c31 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_world_history.yaml new file mode 100644 index 00000000..5474c8ba --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_human_aging.yaml new file mode 100644 index 00000000..464ac67f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_human_sexuality.yaml new file mode 100644 index 00000000..518cb30c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_international_law.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_international_law.yaml new file mode 100644 index 00000000..90262ada --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_international_law diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_jurisprudence.yaml new file mode 100644 index 00000000..8727ab49 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_logical_fallacies.yaml new file mode 100644 index 00000000..da2c8e6c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_machine_learning.yaml new file mode 100644 index 00000000..84a30d9d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_management.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_management.yaml new file mode 100644 index 00000000..fdd340bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_management diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_marketing.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_marketing.yaml new file mode 100644 index 00000000..caf3eb0f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_marketing diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_medical_genetics.yaml new file mode 100644 index 00000000..0d649fd3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_miscellaneous.yaml new file mode 100644 index 00000000..0811f1b0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_moral_disputes.yaml new file mode 100644 index 00000000..0a124ded --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_moral_scenarios.yaml new file mode 100644 index 00000000..65dfaea7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_nutrition.yaml new file mode 100644 index 00000000..804ffc60 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_philosophy.yaml new file mode 100644 index 00000000..88b37de8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_prehistory.yaml new file mode 100644 index 00000000..1e851c49 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_accounting.yaml new file mode 100644 index 00000000..d45c9517 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_law.yaml new file mode 100644 index 00000000..965cbad6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_medicine.yaml new file mode 100644 index 00000000..fdd02d53 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_psychology.yaml new file mode 100644 index 00000000..b8d294b2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_public_relations.yaml new file mode 100644 index 00000000..8f772b0c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_security_studies.yaml new file mode 100644 index 00000000..1a73d36b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_sociology.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_sociology.yaml new file mode 100644 index 00000000..715e5c31 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_sociology diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_us_foreign_policy.yaml new file mode 100644 index 00000000..59e147ae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_virology.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_virology.yaml new file mode 100644 index 00000000..50225ab5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_virology diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_world_religions.yaml new file mode 100644 index 00000000..0193d12d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/id/utils.py b/lm_eval/tasks/global_mmlu/full/id/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig.yaml b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig.yaml new file mode 100644 index 00000000..a263e295 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ig +task: + - global_mmlu_full_ig_stem + - global_mmlu_full_ig_other + - global_mmlu_full_ig_social_sciences + - global_mmlu_full_ig_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_humanities.yaml new file mode 100644 index 00000000..6c6ffb61 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ig_humanities +task: + - global_mmlu_full_ig_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_other.yaml b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_other.yaml new file mode 100644 index 00000000..214efed2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ig_other +task: + - global_mmlu_full_ig_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_social_sciences.yaml new file mode 100644 index 00000000..e27fe1fa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ig_social_sciences +task: + - global_mmlu_full_ig_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_stem.yaml b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_stem.yaml new file mode 100644 index 00000000..5dd33b62 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ig_stem +task: + - global_mmlu_full_ig_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ig/_ig_template_yaml b/lm_eval/tasks/global_mmlu/full/ig/_ig_template_yaml new file mode 100644 index 00000000..0832c633 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/_ig_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ig +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_abstract_algebra.yaml new file mode 100644 index 00000000..1dbf6c83 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_anatomy.yaml new file mode 100644 index 00000000..8dc198c2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_astronomy.yaml new file mode 100644 index 00000000..078069eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_business_ethics.yaml new file mode 100644 index 00000000..f075e740 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_clinical_knowledge.yaml new file mode 100644 index 00000000..d41779ad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_biology.yaml new file mode 100644 index 00000000..5f0e5705 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_chemistry.yaml new file mode 100644 index 00000000..78e25dc8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_computer_science.yaml new file mode 100644 index 00000000..d9894a45 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_mathematics.yaml new file mode 100644 index 00000000..8976041f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_medicine.yaml new file mode 100644 index 00000000..5edaf0d5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_physics.yaml new file mode 100644 index 00000000..e55c01cb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_computer_security.yaml new file mode 100644 index 00000000..5ee7564c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_conceptual_physics.yaml new file mode 100644 index 00000000..555d4fa8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_econometrics.yaml new file mode 100644 index 00000000..783804b8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_electrical_engineering.yaml new file mode 100644 index 00000000..789f95d2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_elementary_mathematics.yaml new file mode 100644 index 00000000..7a5c9d2c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_formal_logic.yaml new file mode 100644 index 00000000..8f9e426c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_global_facts.yaml new file mode 100644 index 00000000..d9b7955c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_biology.yaml new file mode 100644 index 00000000..368bc71d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_chemistry.yaml new file mode 100644 index 00000000..1ce77e10 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_computer_science.yaml new file mode 100644 index 00000000..d859f390 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_european_history.yaml new file mode 100644 index 00000000..29a93f46 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_geography.yaml new file mode 100644 index 00000000..74194a44 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_government_and_politics.yaml new file mode 100644 index 00000000..cd53504d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_macroeconomics.yaml new file mode 100644 index 00000000..30244a64 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_mathematics.yaml new file mode 100644 index 00000000..737c0a56 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_microeconomics.yaml new file mode 100644 index 00000000..c5a2220c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_physics.yaml new file mode 100644 index 00000000..a7d4c537 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_psychology.yaml new file mode 100644 index 00000000..d3051f01 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_statistics.yaml new file mode 100644 index 00000000..d4841032 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_us_history.yaml new file mode 100644 index 00000000..61e124fc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_world_history.yaml new file mode 100644 index 00000000..3d83a63d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_human_aging.yaml new file mode 100644 index 00000000..787e3151 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_human_sexuality.yaml new file mode 100644 index 00000000..5c618459 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_international_law.yaml new file mode 100644 index 00000000..3a8511d2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_jurisprudence.yaml new file mode 100644 index 00000000..46254ea1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_logical_fallacies.yaml new file mode 100644 index 00000000..2bce7502 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_machine_learning.yaml new file mode 100644 index 00000000..93c87fbe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_management.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_management.yaml new file mode 100644 index 00000000..780e1c89 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_management diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_marketing.yaml new file mode 100644 index 00000000..2d30ece9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_medical_genetics.yaml new file mode 100644 index 00000000..cac197c7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_miscellaneous.yaml new file mode 100644 index 00000000..a3824510 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_moral_disputes.yaml new file mode 100644 index 00000000..cc545d84 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_moral_scenarios.yaml new file mode 100644 index 00000000..60ad22fe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_nutrition.yaml new file mode 100644 index 00000000..3cc55607 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_philosophy.yaml new file mode 100644 index 00000000..3f655632 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_prehistory.yaml new file mode 100644 index 00000000..db4affcf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_accounting.yaml new file mode 100644 index 00000000..18d35773 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_law.yaml new file mode 100644 index 00000000..e9db41d0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_medicine.yaml new file mode 100644 index 00000000..7fa28b60 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_psychology.yaml new file mode 100644 index 00000000..639be381 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_public_relations.yaml new file mode 100644 index 00000000..d31af09f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_security_studies.yaml new file mode 100644 index 00000000..200db46b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_sociology.yaml new file mode 100644 index 00000000..65a3e4e1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_us_foreign_policy.yaml new file mode 100644 index 00000000..ff0b0505 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_virology.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_virology.yaml new file mode 100644 index 00000000..b437c82f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_virology diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_world_religions.yaml new file mode 100644 index 00000000..6fbc7cfd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ig/utils.py b/lm_eval/tasks/global_mmlu/full/ig/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it.yaml b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it.yaml new file mode 100644 index 00000000..dabb8987 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_it +task: + - global_mmlu_full_it_stem + - global_mmlu_full_it_other + - global_mmlu_full_it_social_sciences + - global_mmlu_full_it_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_humanities.yaml b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_humanities.yaml new file mode 100644 index 00000000..3d072ccc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_it_humanities +task: + - global_mmlu_full_it_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_other.yaml b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_other.yaml new file mode 100644 index 00000000..99fe18cd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_it_other +task: + - global_mmlu_full_it_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_social_sciences.yaml new file mode 100644 index 00000000..15a457a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_it_social_sciences +task: + - global_mmlu_full_it_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_stem.yaml b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_stem.yaml new file mode 100644 index 00000000..cf7a555d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_it_stem +task: + - global_mmlu_full_it_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/it/_it_template_yaml b/lm_eval/tasks/global_mmlu/full/it/_it_template_yaml new file mode 100644 index 00000000..4798e10a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/_it_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: it +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_abstract_algebra.yaml new file mode 100644 index 00000000..f7351c1e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_anatomy.yaml new file mode 100644 index 00000000..436cd3f6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_astronomy.yaml new file mode 100644 index 00000000..f98f0f20 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_business_ethics.yaml new file mode 100644 index 00000000..d9d931fe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_clinical_knowledge.yaml new file mode 100644 index 00000000..fe429024 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_biology.yaml new file mode 100644 index 00000000..71b8f45e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_chemistry.yaml new file mode 100644 index 00000000..d29bd758 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_computer_science.yaml new file mode 100644 index 00000000..f740d259 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_mathematics.yaml new file mode 100644 index 00000000..7568fb7e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_medicine.yaml new file mode 100644 index 00000000..9bfc5ac1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_physics.yaml new file mode 100644 index 00000000..2101847e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_computer_security.yaml new file mode 100644 index 00000000..70b31f9a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_conceptual_physics.yaml new file mode 100644 index 00000000..d8917d40 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_econometrics.yaml new file mode 100644 index 00000000..a49352fb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_electrical_engineering.yaml new file mode 100644 index 00000000..27f0c6c3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_elementary_mathematics.yaml new file mode 100644 index 00000000..fd78a52e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_formal_logic.yaml new file mode 100644 index 00000000..8171fcf1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_global_facts.yaml new file mode 100644 index 00000000..a952ed44 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_biology.yaml new file mode 100644 index 00000000..939ba752 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_chemistry.yaml new file mode 100644 index 00000000..4524d4dc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_computer_science.yaml new file mode 100644 index 00000000..2dfb1649 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_european_history.yaml new file mode 100644 index 00000000..556aaf20 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_geography.yaml new file mode 100644 index 00000000..3c1d5b60 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_government_and_politics.yaml new file mode 100644 index 00000000..a35b6bac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_macroeconomics.yaml new file mode 100644 index 00000000..74c01ccd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_mathematics.yaml new file mode 100644 index 00000000..6bec02c4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_microeconomics.yaml new file mode 100644 index 00000000..551a0f8d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_physics.yaml new file mode 100644 index 00000000..3cf7144b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_psychology.yaml new file mode 100644 index 00000000..17088e51 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_statistics.yaml new file mode 100644 index 00000000..f3f35f99 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_us_history.yaml new file mode 100644 index 00000000..af222877 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_world_history.yaml new file mode 100644 index 00000000..698ddb5f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_human_aging.yaml new file mode 100644 index 00000000..6ff49730 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_human_sexuality.yaml new file mode 100644 index 00000000..58d32fa4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_international_law.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_international_law.yaml new file mode 100644 index 00000000..d7c47e55 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_international_law diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_jurisprudence.yaml new file mode 100644 index 00000000..e100c0e9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_logical_fallacies.yaml new file mode 100644 index 00000000..a07444a8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_machine_learning.yaml new file mode 100644 index 00000000..bfd3b7a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_management.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_management.yaml new file mode 100644 index 00000000..5b5feeac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_management diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_marketing.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_marketing.yaml new file mode 100644 index 00000000..d50b46f4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_marketing diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_medical_genetics.yaml new file mode 100644 index 00000000..1b02316c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_miscellaneous.yaml new file mode 100644 index 00000000..b638b50d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_moral_disputes.yaml new file mode 100644 index 00000000..520a8bea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_moral_scenarios.yaml new file mode 100644 index 00000000..abfc7395 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_nutrition.yaml new file mode 100644 index 00000000..cac74152 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_philosophy.yaml new file mode 100644 index 00000000..a1d94976 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_prehistory.yaml new file mode 100644 index 00000000..74bdec82 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_accounting.yaml new file mode 100644 index 00000000..acf999a2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_law.yaml new file mode 100644 index 00000000..1ec4f58e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_medicine.yaml new file mode 100644 index 00000000..8b53cdac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_psychology.yaml new file mode 100644 index 00000000..9b7a24f3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_public_relations.yaml new file mode 100644 index 00000000..727cf4b6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_security_studies.yaml new file mode 100644 index 00000000..90fd186c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_sociology.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_sociology.yaml new file mode 100644 index 00000000..dbc77935 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_sociology diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_us_foreign_policy.yaml new file mode 100644 index 00000000..e2a923b6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_virology.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_virology.yaml new file mode 100644 index 00000000..72758a56 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_virology diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_world_religions.yaml new file mode 100644 index 00000000..e4491c51 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/it/utils.py b/lm_eval/tasks/global_mmlu/full/it/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja.yaml b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja.yaml new file mode 100644 index 00000000..103460d7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ja +task: + - global_mmlu_full_ja_stem + - global_mmlu_full_ja_other + - global_mmlu_full_ja_social_sciences + - global_mmlu_full_ja_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_humanities.yaml new file mode 100644 index 00000000..a063eb0b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ja_humanities +task: + - global_mmlu_full_ja_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_other.yaml b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_other.yaml new file mode 100644 index 00000000..1f9b95ed --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ja_other +task: + - global_mmlu_full_ja_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_social_sciences.yaml new file mode 100644 index 00000000..4207fea4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ja_social_sciences +task: + - global_mmlu_full_ja_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_stem.yaml b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_stem.yaml new file mode 100644 index 00000000..7ca6ed1c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ja_stem +task: + - global_mmlu_full_ja_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ja/_ja_template_yaml b/lm_eval/tasks/global_mmlu/full/ja/_ja_template_yaml new file mode 100644 index 00000000..591725e3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/_ja_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ja +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_abstract_algebra.yaml new file mode 100644 index 00000000..4b65a75b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_anatomy.yaml new file mode 100644 index 00000000..e735aa34 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_astronomy.yaml new file mode 100644 index 00000000..a0ba8947 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_business_ethics.yaml new file mode 100644 index 00000000..c39d286c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_clinical_knowledge.yaml new file mode 100644 index 00000000..27d09b88 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_biology.yaml new file mode 100644 index 00000000..15e26a51 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_chemistry.yaml new file mode 100644 index 00000000..52c92423 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_computer_science.yaml new file mode 100644 index 00000000..a91a7d61 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_mathematics.yaml new file mode 100644 index 00000000..67dcd2a0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_medicine.yaml new file mode 100644 index 00000000..c55ab2a2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_physics.yaml new file mode 100644 index 00000000..5413c86d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_computer_security.yaml new file mode 100644 index 00000000..276f214e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_conceptual_physics.yaml new file mode 100644 index 00000000..f823ac44 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_econometrics.yaml new file mode 100644 index 00000000..dbc6846a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_electrical_engineering.yaml new file mode 100644 index 00000000..ba729575 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_elementary_mathematics.yaml new file mode 100644 index 00000000..13807104 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_formal_logic.yaml new file mode 100644 index 00000000..d88d5685 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_global_facts.yaml new file mode 100644 index 00000000..64cb2b9e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_biology.yaml new file mode 100644 index 00000000..b88adf90 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_chemistry.yaml new file mode 100644 index 00000000..eef67cc7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_computer_science.yaml new file mode 100644 index 00000000..c90e5fb7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_european_history.yaml new file mode 100644 index 00000000..8318099a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_geography.yaml new file mode 100644 index 00000000..4ed5a620 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_government_and_politics.yaml new file mode 100644 index 00000000..6ec0ab84 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_macroeconomics.yaml new file mode 100644 index 00000000..4bdd9555 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_mathematics.yaml new file mode 100644 index 00000000..3abfd81b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_microeconomics.yaml new file mode 100644 index 00000000..483161a6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_physics.yaml new file mode 100644 index 00000000..702092af --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_psychology.yaml new file mode 100644 index 00000000..1b7ce92e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_statistics.yaml new file mode 100644 index 00000000..c68acb8e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_us_history.yaml new file mode 100644 index 00000000..b1b91833 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_world_history.yaml new file mode 100644 index 00000000..2dee1f89 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_human_aging.yaml new file mode 100644 index 00000000..3612a7ee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_human_sexuality.yaml new file mode 100644 index 00000000..b70204fb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_international_law.yaml new file mode 100644 index 00000000..77ed3c97 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_jurisprudence.yaml new file mode 100644 index 00000000..f8fbb261 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_logical_fallacies.yaml new file mode 100644 index 00000000..58d4afcc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_machine_learning.yaml new file mode 100644 index 00000000..e664390f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_management.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_management.yaml new file mode 100644 index 00000000..cf495ae2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_management diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_marketing.yaml new file mode 100644 index 00000000..1349771e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_medical_genetics.yaml new file mode 100644 index 00000000..1b513ac4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_miscellaneous.yaml new file mode 100644 index 00000000..81659bf7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_moral_disputes.yaml new file mode 100644 index 00000000..2e77694b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_moral_scenarios.yaml new file mode 100644 index 00000000..f322376d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_nutrition.yaml new file mode 100644 index 00000000..1d58fb0e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_philosophy.yaml new file mode 100644 index 00000000..23865361 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_prehistory.yaml new file mode 100644 index 00000000..a044bf99 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_accounting.yaml new file mode 100644 index 00000000..b828e0e2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_law.yaml new file mode 100644 index 00000000..7aafb6c4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_medicine.yaml new file mode 100644 index 00000000..b0cf9905 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_psychology.yaml new file mode 100644 index 00000000..e5ef36c1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_public_relations.yaml new file mode 100644 index 00000000..565439e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_security_studies.yaml new file mode 100644 index 00000000..f7d21bd6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_sociology.yaml new file mode 100644 index 00000000..5cc44c1c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_us_foreign_policy.yaml new file mode 100644 index 00000000..8ebdb14a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_virology.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_virology.yaml new file mode 100644 index 00000000..d6f83367 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_virology diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_world_religions.yaml new file mode 100644 index 00000000..23e66e06 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ja/utils.py b/lm_eval/tasks/global_mmlu/full/ja/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko.yaml b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko.yaml new file mode 100644 index 00000000..d2225e23 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ko +task: + - global_mmlu_full_ko_stem + - global_mmlu_full_ko_other + - global_mmlu_full_ko_social_sciences + - global_mmlu_full_ko_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_humanities.yaml new file mode 100644 index 00000000..c7690643 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ko_humanities +task: + - global_mmlu_full_ko_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_other.yaml b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_other.yaml new file mode 100644 index 00000000..8990ae95 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ko_other +task: + - global_mmlu_full_ko_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_social_sciences.yaml new file mode 100644 index 00000000..0bbfad7f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ko_social_sciences +task: + - global_mmlu_full_ko_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_stem.yaml b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_stem.yaml new file mode 100644 index 00000000..18b7f17b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ko_stem +task: + - global_mmlu_full_ko_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ko/_ko_template_yaml b/lm_eval/tasks/global_mmlu/full/ko/_ko_template_yaml new file mode 100644 index 00000000..11700a26 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/_ko_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ko +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_abstract_algebra.yaml new file mode 100644 index 00000000..5959d788 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_anatomy.yaml new file mode 100644 index 00000000..ebb90860 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_astronomy.yaml new file mode 100644 index 00000000..670846b3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_business_ethics.yaml new file mode 100644 index 00000000..1a44e430 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_clinical_knowledge.yaml new file mode 100644 index 00000000..e9e29697 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_biology.yaml new file mode 100644 index 00000000..fc364468 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_chemistry.yaml new file mode 100644 index 00000000..2eb0f416 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_computer_science.yaml new file mode 100644 index 00000000..044f1eff --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_mathematics.yaml new file mode 100644 index 00000000..9929097c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_medicine.yaml new file mode 100644 index 00000000..b78c24e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_physics.yaml new file mode 100644 index 00000000..20c3fb20 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_computer_security.yaml new file mode 100644 index 00000000..1f954572 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_conceptual_physics.yaml new file mode 100644 index 00000000..f7998975 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_econometrics.yaml new file mode 100644 index 00000000..79c35ed7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_electrical_engineering.yaml new file mode 100644 index 00000000..1444a249 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_elementary_mathematics.yaml new file mode 100644 index 00000000..8bec91b9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_formal_logic.yaml new file mode 100644 index 00000000..1cf31092 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_global_facts.yaml new file mode 100644 index 00000000..2a5f7bd5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_biology.yaml new file mode 100644 index 00000000..bdaed574 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_chemistry.yaml new file mode 100644 index 00000000..193a064c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_computer_science.yaml new file mode 100644 index 00000000..2d2ad648 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_european_history.yaml new file mode 100644 index 00000000..a48b602d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_geography.yaml new file mode 100644 index 00000000..cc9c20eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_government_and_politics.yaml new file mode 100644 index 00000000..e86a27fa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_macroeconomics.yaml new file mode 100644 index 00000000..4b947f7f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_mathematics.yaml new file mode 100644 index 00000000..9184ad9c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_microeconomics.yaml new file mode 100644 index 00000000..50b6a150 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_physics.yaml new file mode 100644 index 00000000..974e3b03 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_psychology.yaml new file mode 100644 index 00000000..e617e8cd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_statistics.yaml new file mode 100644 index 00000000..1a010596 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_us_history.yaml new file mode 100644 index 00000000..a696675d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_world_history.yaml new file mode 100644 index 00000000..eca86cbe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_human_aging.yaml new file mode 100644 index 00000000..69e3a2df --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_human_sexuality.yaml new file mode 100644 index 00000000..ed3e99fc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_international_law.yaml new file mode 100644 index 00000000..651f389c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_jurisprudence.yaml new file mode 100644 index 00000000..001807eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_logical_fallacies.yaml new file mode 100644 index 00000000..01eec477 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_machine_learning.yaml new file mode 100644 index 00000000..c1126c6b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_management.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_management.yaml new file mode 100644 index 00000000..3b833270 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_management diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_marketing.yaml new file mode 100644 index 00000000..3cce25c0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_medical_genetics.yaml new file mode 100644 index 00000000..65df1786 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_miscellaneous.yaml new file mode 100644 index 00000000..04b71e2a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_moral_disputes.yaml new file mode 100644 index 00000000..3f1e7fa1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_moral_scenarios.yaml new file mode 100644 index 00000000..c657543a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_nutrition.yaml new file mode 100644 index 00000000..dff6450f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_philosophy.yaml new file mode 100644 index 00000000..21f058af --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_prehistory.yaml new file mode 100644 index 00000000..56aedae9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_accounting.yaml new file mode 100644 index 00000000..24f83b23 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_law.yaml new file mode 100644 index 00000000..ece9dc5c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_medicine.yaml new file mode 100644 index 00000000..43930957 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_psychology.yaml new file mode 100644 index 00000000..98ff6520 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_public_relations.yaml new file mode 100644 index 00000000..1a5b07f7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_security_studies.yaml new file mode 100644 index 00000000..3663391a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_sociology.yaml new file mode 100644 index 00000000..902b4443 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_us_foreign_policy.yaml new file mode 100644 index 00000000..36e1794c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_virology.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_virology.yaml new file mode 100644 index 00000000..64b58d6a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_virology diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_world_religions.yaml new file mode 100644 index 00000000..7289671f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ko/utils.py b/lm_eval/tasks/global_mmlu/full/ko/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky.yaml b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky.yaml new file mode 100644 index 00000000..4774599a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ky +task: + - global_mmlu_full_ky_stem + - global_mmlu_full_ky_other + - global_mmlu_full_ky_social_sciences + - global_mmlu_full_ky_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_humanities.yaml new file mode 100644 index 00000000..1e0368c2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ky_humanities +task: + - global_mmlu_full_ky_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_other.yaml b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_other.yaml new file mode 100644 index 00000000..1bfc89ab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ky_other +task: + - global_mmlu_full_ky_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_social_sciences.yaml new file mode 100644 index 00000000..3ae756c4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ky_social_sciences +task: + - global_mmlu_full_ky_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_stem.yaml b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_stem.yaml new file mode 100644 index 00000000..817456fc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ky_stem +task: + - global_mmlu_full_ky_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ky/_ky_template_yaml b/lm_eval/tasks/global_mmlu/full/ky/_ky_template_yaml new file mode 100644 index 00000000..63f88823 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/_ky_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ky +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_abstract_algebra.yaml new file mode 100644 index 00000000..21338a56 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_anatomy.yaml new file mode 100644 index 00000000..df263548 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_astronomy.yaml new file mode 100644 index 00000000..5e0f6aba --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_business_ethics.yaml new file mode 100644 index 00000000..17656dc6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_clinical_knowledge.yaml new file mode 100644 index 00000000..8c053b88 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_biology.yaml new file mode 100644 index 00000000..36492106 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_chemistry.yaml new file mode 100644 index 00000000..cb9f8586 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_computer_science.yaml new file mode 100644 index 00000000..e4b15b54 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_mathematics.yaml new file mode 100644 index 00000000..f5657b66 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_medicine.yaml new file mode 100644 index 00000000..f1e0c25e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_physics.yaml new file mode 100644 index 00000000..fac1d80f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_computer_security.yaml new file mode 100644 index 00000000..e35718d0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_conceptual_physics.yaml new file mode 100644 index 00000000..f165ec61 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_econometrics.yaml new file mode 100644 index 00000000..48670c7f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_electrical_engineering.yaml new file mode 100644 index 00000000..29d24142 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_elementary_mathematics.yaml new file mode 100644 index 00000000..9aa7f81b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_formal_logic.yaml new file mode 100644 index 00000000..70a5bd86 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_global_facts.yaml new file mode 100644 index 00000000..f678c0d1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_biology.yaml new file mode 100644 index 00000000..750bc68b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_chemistry.yaml new file mode 100644 index 00000000..7700e37f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_computer_science.yaml new file mode 100644 index 00000000..c805fc4e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_european_history.yaml new file mode 100644 index 00000000..01c67f8e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_geography.yaml new file mode 100644 index 00000000..ccc5c8b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_government_and_politics.yaml new file mode 100644 index 00000000..02ea66ef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_macroeconomics.yaml new file mode 100644 index 00000000..f693296d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_mathematics.yaml new file mode 100644 index 00000000..b05e2799 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_microeconomics.yaml new file mode 100644 index 00000000..d596290f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_physics.yaml new file mode 100644 index 00000000..3f71865c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_psychology.yaml new file mode 100644 index 00000000..635873a1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_statistics.yaml new file mode 100644 index 00000000..df8cfefb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_us_history.yaml new file mode 100644 index 00000000..3c75f534 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_world_history.yaml new file mode 100644 index 00000000..e73edcbe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_human_aging.yaml new file mode 100644 index 00000000..f4e662a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_human_sexuality.yaml new file mode 100644 index 00000000..8c2556da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_international_law.yaml new file mode 100644 index 00000000..2af16190 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_jurisprudence.yaml new file mode 100644 index 00000000..f0994cc2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_logical_fallacies.yaml new file mode 100644 index 00000000..f7933a77 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_machine_learning.yaml new file mode 100644 index 00000000..f6e525a1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_management.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_management.yaml new file mode 100644 index 00000000..03f70aa0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_management diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_marketing.yaml new file mode 100644 index 00000000..72ced798 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_medical_genetics.yaml new file mode 100644 index 00000000..371e4b21 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_miscellaneous.yaml new file mode 100644 index 00000000..e693ab8b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_moral_disputes.yaml new file mode 100644 index 00000000..ccafcb1e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_moral_scenarios.yaml new file mode 100644 index 00000000..16c19b29 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_nutrition.yaml new file mode 100644 index 00000000..f6c00cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_philosophy.yaml new file mode 100644 index 00000000..6d6d242b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_prehistory.yaml new file mode 100644 index 00000000..4ff2e08d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_accounting.yaml new file mode 100644 index 00000000..37c6a892 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_law.yaml new file mode 100644 index 00000000..9b4fea0e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_medicine.yaml new file mode 100644 index 00000000..8a6ef0b5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_psychology.yaml new file mode 100644 index 00000000..dce1b6d2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_public_relations.yaml new file mode 100644 index 00000000..168cae74 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_security_studies.yaml new file mode 100644 index 00000000..1e24b816 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_sociology.yaml new file mode 100644 index 00000000..7d1ad959 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_us_foreign_policy.yaml new file mode 100644 index 00000000..36cd7e20 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_virology.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_virology.yaml new file mode 100644 index 00000000..e2a77915 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_virology diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_world_religions.yaml new file mode 100644 index 00000000..563c1397 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ky/utils.py b/lm_eval/tasks/global_mmlu/full/ky/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt.yaml b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt.yaml new file mode 100644 index 00000000..93929d42 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_lt +task: + - global_mmlu_full_lt_stem + - global_mmlu_full_lt_other + - global_mmlu_full_lt_social_sciences + - global_mmlu_full_lt_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_humanities.yaml b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_humanities.yaml new file mode 100644 index 00000000..48ad351f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_lt_humanities +task: + - global_mmlu_full_lt_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_other.yaml b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_other.yaml new file mode 100644 index 00000000..8f63c35a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_lt_other +task: + - global_mmlu_full_lt_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_social_sciences.yaml new file mode 100644 index 00000000..9ababd6d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_lt_social_sciences +task: + - global_mmlu_full_lt_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_stem.yaml b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_stem.yaml new file mode 100644 index 00000000..1a59e683 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_lt_stem +task: + - global_mmlu_full_lt_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/lt/_lt_template_yaml b/lm_eval/tasks/global_mmlu/full/lt/_lt_template_yaml new file mode 100644 index 00000000..8b925338 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/_lt_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: lt +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_abstract_algebra.yaml new file mode 100644 index 00000000..76b96844 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_anatomy.yaml new file mode 100644 index 00000000..527c7107 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_astronomy.yaml new file mode 100644 index 00000000..419b89e3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_business_ethics.yaml new file mode 100644 index 00000000..c51daa22 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_clinical_knowledge.yaml new file mode 100644 index 00000000..e0232774 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_biology.yaml new file mode 100644 index 00000000..c6fea6f8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_chemistry.yaml new file mode 100644 index 00000000..93b9a561 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_computer_science.yaml new file mode 100644 index 00000000..8d0dcfdd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_mathematics.yaml new file mode 100644 index 00000000..8d33b747 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_medicine.yaml new file mode 100644 index 00000000..ad74dbb1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_physics.yaml new file mode 100644 index 00000000..3c69754b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_computer_security.yaml new file mode 100644 index 00000000..d78f3a54 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_conceptual_physics.yaml new file mode 100644 index 00000000..3e7b5e49 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_econometrics.yaml new file mode 100644 index 00000000..6d0085ab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_electrical_engineering.yaml new file mode 100644 index 00000000..284dfe9e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_elementary_mathematics.yaml new file mode 100644 index 00000000..7e9a0103 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_formal_logic.yaml new file mode 100644 index 00000000..ec9a665b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_global_facts.yaml new file mode 100644 index 00000000..d81a9470 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_biology.yaml new file mode 100644 index 00000000..139376cc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_chemistry.yaml new file mode 100644 index 00000000..87112d8c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_computer_science.yaml new file mode 100644 index 00000000..2324bb28 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_european_history.yaml new file mode 100644 index 00000000..5f365fab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_geography.yaml new file mode 100644 index 00000000..e3a6f921 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_government_and_politics.yaml new file mode 100644 index 00000000..526b68ed --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_macroeconomics.yaml new file mode 100644 index 00000000..e14b1dce --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_mathematics.yaml new file mode 100644 index 00000000..1cdf5c90 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_microeconomics.yaml new file mode 100644 index 00000000..a2f2a210 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_physics.yaml new file mode 100644 index 00000000..bd363709 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_psychology.yaml new file mode 100644 index 00000000..aad65a13 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_statistics.yaml new file mode 100644 index 00000000..6dd6d699 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_us_history.yaml new file mode 100644 index 00000000..5fb0ee1e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_world_history.yaml new file mode 100644 index 00000000..75f2769a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_human_aging.yaml new file mode 100644 index 00000000..beb27e9b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_human_sexuality.yaml new file mode 100644 index 00000000..c9d952c3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_international_law.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_international_law.yaml new file mode 100644 index 00000000..f77adf9b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_international_law diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_jurisprudence.yaml new file mode 100644 index 00000000..e6be84fd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_logical_fallacies.yaml new file mode 100644 index 00000000..ad597b27 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_machine_learning.yaml new file mode 100644 index 00000000..eb06a871 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_management.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_management.yaml new file mode 100644 index 00000000..e1885ad3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_management diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_marketing.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_marketing.yaml new file mode 100644 index 00000000..2dc83089 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_marketing diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_medical_genetics.yaml new file mode 100644 index 00000000..b67d321e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_miscellaneous.yaml new file mode 100644 index 00000000..2c744613 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_moral_disputes.yaml new file mode 100644 index 00000000..09e6f044 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_moral_scenarios.yaml new file mode 100644 index 00000000..bb8dd330 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_nutrition.yaml new file mode 100644 index 00000000..0b1a8556 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_philosophy.yaml new file mode 100644 index 00000000..aab1d556 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_prehistory.yaml new file mode 100644 index 00000000..ac93dd6c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_accounting.yaml new file mode 100644 index 00000000..6be78ec3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_law.yaml new file mode 100644 index 00000000..60b6cdcc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_medicine.yaml new file mode 100644 index 00000000..dd899676 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_psychology.yaml new file mode 100644 index 00000000..bd796e8b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_public_relations.yaml new file mode 100644 index 00000000..3c6e5f39 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_security_studies.yaml new file mode 100644 index 00000000..9eb9957d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_sociology.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_sociology.yaml new file mode 100644 index 00000000..2e17f95a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_sociology diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_us_foreign_policy.yaml new file mode 100644 index 00000000..d39bb63c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_virology.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_virology.yaml new file mode 100644 index 00000000..b8482a61 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_virology diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_world_religions.yaml new file mode 100644 index 00000000..a86af60d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/lt/utils.py b/lm_eval/tasks/global_mmlu/full/lt/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg.yaml b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg.yaml new file mode 100644 index 00000000..05b55948 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_mg +task: + - global_mmlu_full_mg_stem + - global_mmlu_full_mg_other + - global_mmlu_full_mg_social_sciences + - global_mmlu_full_mg_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_humanities.yaml b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_humanities.yaml new file mode 100644 index 00000000..76b08f6c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_mg_humanities +task: + - global_mmlu_full_mg_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_other.yaml b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_other.yaml new file mode 100644 index 00000000..0006af4c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_mg_other +task: + - global_mmlu_full_mg_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_social_sciences.yaml new file mode 100644 index 00000000..9cfe4f5b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_mg_social_sciences +task: + - global_mmlu_full_mg_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_stem.yaml b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_stem.yaml new file mode 100644 index 00000000..bdc719d1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_mg_stem +task: + - global_mmlu_full_mg_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/mg/_mg_template_yaml b/lm_eval/tasks/global_mmlu/full/mg/_mg_template_yaml new file mode 100644 index 00000000..4aa97b27 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/_mg_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: mg +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_abstract_algebra.yaml new file mode 100644 index 00000000..bea850ed --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_anatomy.yaml new file mode 100644 index 00000000..1cf6c116 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_astronomy.yaml new file mode 100644 index 00000000..df582b27 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_business_ethics.yaml new file mode 100644 index 00000000..a6351342 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_clinical_knowledge.yaml new file mode 100644 index 00000000..21003af5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_biology.yaml new file mode 100644 index 00000000..d305ca94 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_chemistry.yaml new file mode 100644 index 00000000..7ccaffb9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_computer_science.yaml new file mode 100644 index 00000000..248f72c3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_mathematics.yaml new file mode 100644 index 00000000..fb817aae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_medicine.yaml new file mode 100644 index 00000000..4fdbee03 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_physics.yaml new file mode 100644 index 00000000..493bdf87 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_computer_security.yaml new file mode 100644 index 00000000..c5054eb6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_conceptual_physics.yaml new file mode 100644 index 00000000..44a13a70 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_econometrics.yaml new file mode 100644 index 00000000..2c5d029a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_electrical_engineering.yaml new file mode 100644 index 00000000..1e5ece33 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_elementary_mathematics.yaml new file mode 100644 index 00000000..4d62c758 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_formal_logic.yaml new file mode 100644 index 00000000..e5dc67d0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_global_facts.yaml new file mode 100644 index 00000000..2712e9b4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_biology.yaml new file mode 100644 index 00000000..c58957e0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_chemistry.yaml new file mode 100644 index 00000000..707b7356 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_computer_science.yaml new file mode 100644 index 00000000..d7afd5a7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_european_history.yaml new file mode 100644 index 00000000..b6391ee4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_geography.yaml new file mode 100644 index 00000000..eb7014a6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_government_and_politics.yaml new file mode 100644 index 00000000..74c5fc18 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_macroeconomics.yaml new file mode 100644 index 00000000..24631ff3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_mathematics.yaml new file mode 100644 index 00000000..b9db4a0c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_microeconomics.yaml new file mode 100644 index 00000000..f321b06a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_physics.yaml new file mode 100644 index 00000000..bc25971e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_psychology.yaml new file mode 100644 index 00000000..42cc39a8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_statistics.yaml new file mode 100644 index 00000000..08cf8671 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_us_history.yaml new file mode 100644 index 00000000..87314a57 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_world_history.yaml new file mode 100644 index 00000000..c341a243 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_human_aging.yaml new file mode 100644 index 00000000..15375f9f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_human_sexuality.yaml new file mode 100644 index 00000000..21419b9b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_international_law.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_international_law.yaml new file mode 100644 index 00000000..9d481339 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_international_law diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_jurisprudence.yaml new file mode 100644 index 00000000..f083a0ab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_logical_fallacies.yaml new file mode 100644 index 00000000..57e2e731 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_machine_learning.yaml new file mode 100644 index 00000000..7609a09f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_management.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_management.yaml new file mode 100644 index 00000000..becfe4b3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_management diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_marketing.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_marketing.yaml new file mode 100644 index 00000000..3765002b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_marketing diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_medical_genetics.yaml new file mode 100644 index 00000000..3f023ccd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_miscellaneous.yaml new file mode 100644 index 00000000..2993999d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_moral_disputes.yaml new file mode 100644 index 00000000..fd430a0a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_moral_scenarios.yaml new file mode 100644 index 00000000..c1b16e86 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_nutrition.yaml new file mode 100644 index 00000000..ab471f42 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_philosophy.yaml new file mode 100644 index 00000000..f598830e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_prehistory.yaml new file mode 100644 index 00000000..330f1f52 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_accounting.yaml new file mode 100644 index 00000000..694118d1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_law.yaml new file mode 100644 index 00000000..fb6df92a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_medicine.yaml new file mode 100644 index 00000000..1de72b6b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_psychology.yaml new file mode 100644 index 00000000..f922e162 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_public_relations.yaml new file mode 100644 index 00000000..c829b89d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_security_studies.yaml new file mode 100644 index 00000000..362b4dbd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_sociology.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_sociology.yaml new file mode 100644 index 00000000..f0638cdb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_sociology diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_us_foreign_policy.yaml new file mode 100644 index 00000000..8ead541a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_virology.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_virology.yaml new file mode 100644 index 00000000..1ca09027 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_virology diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_world_religions.yaml new file mode 100644 index 00000000..2bb64d70 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/mg/utils.py b/lm_eval/tasks/global_mmlu/full/mg/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms.yaml b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms.yaml new file mode 100644 index 00000000..e5a13645 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ms +task: + - global_mmlu_full_ms_stem + - global_mmlu_full_ms_other + - global_mmlu_full_ms_social_sciences + - global_mmlu_full_ms_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_humanities.yaml new file mode 100644 index 00000000..0641187b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ms_humanities +task: + - global_mmlu_full_ms_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_other.yaml b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_other.yaml new file mode 100644 index 00000000..3d14420c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ms_other +task: + - global_mmlu_full_ms_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_social_sciences.yaml new file mode 100644 index 00000000..3db339d7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ms_social_sciences +task: + - global_mmlu_full_ms_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_stem.yaml b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_stem.yaml new file mode 100644 index 00000000..68908e16 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ms_stem +task: + - global_mmlu_full_ms_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ms/_ms_template_yaml b/lm_eval/tasks/global_mmlu/full/ms/_ms_template_yaml new file mode 100644 index 00000000..ba750264 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/_ms_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ms +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_abstract_algebra.yaml new file mode 100644 index 00000000..ec791f2a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_anatomy.yaml new file mode 100644 index 00000000..35038bea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_astronomy.yaml new file mode 100644 index 00000000..79fdcbdd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_business_ethics.yaml new file mode 100644 index 00000000..ffd6195a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_clinical_knowledge.yaml new file mode 100644 index 00000000..4c69b82e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_biology.yaml new file mode 100644 index 00000000..58219479 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_chemistry.yaml new file mode 100644 index 00000000..35514b83 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_computer_science.yaml new file mode 100644 index 00000000..5e242b8b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_mathematics.yaml new file mode 100644 index 00000000..07e10799 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_medicine.yaml new file mode 100644 index 00000000..82822217 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_physics.yaml new file mode 100644 index 00000000..be20fa6c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_computer_security.yaml new file mode 100644 index 00000000..2e886b50 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_conceptual_physics.yaml new file mode 100644 index 00000000..2a2fb6da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_econometrics.yaml new file mode 100644 index 00000000..efdffabf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_electrical_engineering.yaml new file mode 100644 index 00000000..80eba2e4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_elementary_mathematics.yaml new file mode 100644 index 00000000..1e6caf26 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_formal_logic.yaml new file mode 100644 index 00000000..59147662 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_global_facts.yaml new file mode 100644 index 00000000..6ac76cad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_biology.yaml new file mode 100644 index 00000000..6be8ccfe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_chemistry.yaml new file mode 100644 index 00000000..f01c29b6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_computer_science.yaml new file mode 100644 index 00000000..b18e8cf8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_european_history.yaml new file mode 100644 index 00000000..fdb41802 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_geography.yaml new file mode 100644 index 00000000..c4e44a60 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_government_and_politics.yaml new file mode 100644 index 00000000..0ebbfe6f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_macroeconomics.yaml new file mode 100644 index 00000000..f28f9a5d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_mathematics.yaml new file mode 100644 index 00000000..50a2552d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_microeconomics.yaml new file mode 100644 index 00000000..6747cd9d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_physics.yaml new file mode 100644 index 00000000..aef3fee8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_psychology.yaml new file mode 100644 index 00000000..3e8641e9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_statistics.yaml new file mode 100644 index 00000000..4aa7ba00 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_us_history.yaml new file mode 100644 index 00000000..e6d1faab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_world_history.yaml new file mode 100644 index 00000000..4caf7e54 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_human_aging.yaml new file mode 100644 index 00000000..5b2b5c5f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_human_sexuality.yaml new file mode 100644 index 00000000..2ddef17a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_international_law.yaml new file mode 100644 index 00000000..61795f58 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_jurisprudence.yaml new file mode 100644 index 00000000..f2e96706 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_logical_fallacies.yaml new file mode 100644 index 00000000..1d142bde --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_machine_learning.yaml new file mode 100644 index 00000000..94724056 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_management.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_management.yaml new file mode 100644 index 00000000..8ca04a13 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_management diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_marketing.yaml new file mode 100644 index 00000000..ec0e4462 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_medical_genetics.yaml new file mode 100644 index 00000000..0f2b1eec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_miscellaneous.yaml new file mode 100644 index 00000000..65da952e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_moral_disputes.yaml new file mode 100644 index 00000000..399035f2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_moral_scenarios.yaml new file mode 100644 index 00000000..3bc74baa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_nutrition.yaml new file mode 100644 index 00000000..300de677 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_philosophy.yaml new file mode 100644 index 00000000..8f6eceae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_prehistory.yaml new file mode 100644 index 00000000..4c624fec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_accounting.yaml new file mode 100644 index 00000000..9a06e7f9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_law.yaml new file mode 100644 index 00000000..b3d5921a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_medicine.yaml new file mode 100644 index 00000000..0d9a58b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_psychology.yaml new file mode 100644 index 00000000..7f51baec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_public_relations.yaml new file mode 100644 index 00000000..c07cbdee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_security_studies.yaml new file mode 100644 index 00000000..651cb72d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_sociology.yaml new file mode 100644 index 00000000..5aeb7efa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_us_foreign_policy.yaml new file mode 100644 index 00000000..ecbf5705 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_virology.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_virology.yaml new file mode 100644 index 00000000..fbdd5e25 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_virology diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_world_religions.yaml new file mode 100644 index 00000000..32b35029 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ms/utils.py b/lm_eval/tasks/global_mmlu/full/ms/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne.yaml b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne.yaml new file mode 100644 index 00000000..ec13a0be --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ne +task: + - global_mmlu_full_ne_stem + - global_mmlu_full_ne_other + - global_mmlu_full_ne_social_sciences + - global_mmlu_full_ne_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_humanities.yaml new file mode 100644 index 00000000..fef749db --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ne_humanities +task: + - global_mmlu_full_ne_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_other.yaml b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_other.yaml new file mode 100644 index 00000000..0d3dfbd4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ne_other +task: + - global_mmlu_full_ne_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_social_sciences.yaml new file mode 100644 index 00000000..f1f09f00 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ne_social_sciences +task: + - global_mmlu_full_ne_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_stem.yaml b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_stem.yaml new file mode 100644 index 00000000..eebc1cac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ne_stem +task: + - global_mmlu_full_ne_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ne/_ne_template_yaml b/lm_eval/tasks/global_mmlu/full/ne/_ne_template_yaml new file mode 100644 index 00000000..25f8daec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/_ne_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ne +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_abstract_algebra.yaml new file mode 100644 index 00000000..48bf7bb1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_anatomy.yaml new file mode 100644 index 00000000..0f66f8ec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_astronomy.yaml new file mode 100644 index 00000000..a02aaf30 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_business_ethics.yaml new file mode 100644 index 00000000..d87f5b98 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_clinical_knowledge.yaml new file mode 100644 index 00000000..f27eb4e2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_biology.yaml new file mode 100644 index 00000000..d26edef8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_chemistry.yaml new file mode 100644 index 00000000..88b8bd86 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_computer_science.yaml new file mode 100644 index 00000000..51909ffc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_mathematics.yaml new file mode 100644 index 00000000..40b9cb79 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_medicine.yaml new file mode 100644 index 00000000..81f81f84 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_physics.yaml new file mode 100644 index 00000000..09798c09 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_computer_security.yaml new file mode 100644 index 00000000..49d89dd4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_conceptual_physics.yaml new file mode 100644 index 00000000..94bfec4a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_econometrics.yaml new file mode 100644 index 00000000..81d6ed98 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_electrical_engineering.yaml new file mode 100644 index 00000000..73ad1a34 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_elementary_mathematics.yaml new file mode 100644 index 00000000..cbc3bacd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_formal_logic.yaml new file mode 100644 index 00000000..225da2fb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_global_facts.yaml new file mode 100644 index 00000000..6f5e9f1b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_biology.yaml new file mode 100644 index 00000000..a8c0436a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_chemistry.yaml new file mode 100644 index 00000000..405661c7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_computer_science.yaml new file mode 100644 index 00000000..6cff5ba6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_european_history.yaml new file mode 100644 index 00000000..4f7eb3ee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_geography.yaml new file mode 100644 index 00000000..0453e51d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_government_and_politics.yaml new file mode 100644 index 00000000..05710100 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_macroeconomics.yaml new file mode 100644 index 00000000..fd68d5f0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_mathematics.yaml new file mode 100644 index 00000000..39ef0a58 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_microeconomics.yaml new file mode 100644 index 00000000..535a3918 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_physics.yaml new file mode 100644 index 00000000..f355dad2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_psychology.yaml new file mode 100644 index 00000000..a52d7a01 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_statistics.yaml new file mode 100644 index 00000000..5a256420 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_us_history.yaml new file mode 100644 index 00000000..9e1199b1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_world_history.yaml new file mode 100644 index 00000000..afc2135b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_human_aging.yaml new file mode 100644 index 00000000..18450534 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_human_sexuality.yaml new file mode 100644 index 00000000..7d23b839 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_international_law.yaml new file mode 100644 index 00000000..5be599d2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_jurisprudence.yaml new file mode 100644 index 00000000..180a397c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_logical_fallacies.yaml new file mode 100644 index 00000000..3aa369a9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_machine_learning.yaml new file mode 100644 index 00000000..4e08abda --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_management.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_management.yaml new file mode 100644 index 00000000..e44c5be6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_management diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_marketing.yaml new file mode 100644 index 00000000..10f7daa2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_medical_genetics.yaml new file mode 100644 index 00000000..8139b1f7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_miscellaneous.yaml new file mode 100644 index 00000000..cb1bf905 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_moral_disputes.yaml new file mode 100644 index 00000000..1b74fb36 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_moral_scenarios.yaml new file mode 100644 index 00000000..91f8f06c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_nutrition.yaml new file mode 100644 index 00000000..575f0e45 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_philosophy.yaml new file mode 100644 index 00000000..95fdd0eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_prehistory.yaml new file mode 100644 index 00000000..e6e5c706 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_accounting.yaml new file mode 100644 index 00000000..718cedee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_law.yaml new file mode 100644 index 00000000..89c70160 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_medicine.yaml new file mode 100644 index 00000000..a366e0c4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_psychology.yaml new file mode 100644 index 00000000..649e5343 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_public_relations.yaml new file mode 100644 index 00000000..37f2ddea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_security_studies.yaml new file mode 100644 index 00000000..55f80904 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_sociology.yaml new file mode 100644 index 00000000..78161d5a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_us_foreign_policy.yaml new file mode 100644 index 00000000..c38f59c4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_virology.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_virology.yaml new file mode 100644 index 00000000..0c15808f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_virology diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_world_religions.yaml new file mode 100644 index 00000000..5c6163f1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ne/utils.py b/lm_eval/tasks/global_mmlu/full/ne/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl.yaml b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl.yaml new file mode 100644 index 00000000..44f562da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_nl +task: + - global_mmlu_full_nl_stem + - global_mmlu_full_nl_other + - global_mmlu_full_nl_social_sciences + - global_mmlu_full_nl_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_humanities.yaml b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_humanities.yaml new file mode 100644 index 00000000..656a421b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_nl_humanities +task: + - global_mmlu_full_nl_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_other.yaml b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_other.yaml new file mode 100644 index 00000000..23a42201 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_nl_other +task: + - global_mmlu_full_nl_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_social_sciences.yaml new file mode 100644 index 00000000..afba5678 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_nl_social_sciences +task: + - global_mmlu_full_nl_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_stem.yaml b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_stem.yaml new file mode 100644 index 00000000..9658b13e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_nl_stem +task: + - global_mmlu_full_nl_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/nl/_nl_template_yaml b/lm_eval/tasks/global_mmlu/full/nl/_nl_template_yaml new file mode 100644 index 00000000..39efbfd1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/_nl_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: nl +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_abstract_algebra.yaml new file mode 100644 index 00000000..458a3614 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_anatomy.yaml new file mode 100644 index 00000000..e4cbd90e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_astronomy.yaml new file mode 100644 index 00000000..84cdf578 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_business_ethics.yaml new file mode 100644 index 00000000..f75776f2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_clinical_knowledge.yaml new file mode 100644 index 00000000..6e963d0f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_biology.yaml new file mode 100644 index 00000000..e4a3660b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_chemistry.yaml new file mode 100644 index 00000000..fa9faed9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_computer_science.yaml new file mode 100644 index 00000000..b603c309 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_mathematics.yaml new file mode 100644 index 00000000..f55207ea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_medicine.yaml new file mode 100644 index 00000000..5cdda1b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_physics.yaml new file mode 100644 index 00000000..26d70230 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_computer_security.yaml new file mode 100644 index 00000000..01a8a747 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_conceptual_physics.yaml new file mode 100644 index 00000000..cccd2666 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_econometrics.yaml new file mode 100644 index 00000000..22ad59bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_electrical_engineering.yaml new file mode 100644 index 00000000..3aca226f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_elementary_mathematics.yaml new file mode 100644 index 00000000..2118a1d5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_formal_logic.yaml new file mode 100644 index 00000000..5fd86105 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_global_facts.yaml new file mode 100644 index 00000000..d7147d51 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_biology.yaml new file mode 100644 index 00000000..271b54f6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_chemistry.yaml new file mode 100644 index 00000000..921abd17 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_computer_science.yaml new file mode 100644 index 00000000..ea190bea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_european_history.yaml new file mode 100644 index 00000000..c348d482 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_geography.yaml new file mode 100644 index 00000000..de31a63b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_government_and_politics.yaml new file mode 100644 index 00000000..bc0e3cb1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_macroeconomics.yaml new file mode 100644 index 00000000..2e221c68 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_mathematics.yaml new file mode 100644 index 00000000..137158a6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_microeconomics.yaml new file mode 100644 index 00000000..27b426c0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_physics.yaml new file mode 100644 index 00000000..746df49e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_psychology.yaml new file mode 100644 index 00000000..89cb42d9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_statistics.yaml new file mode 100644 index 00000000..e27082c4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_us_history.yaml new file mode 100644 index 00000000..66efc58c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_world_history.yaml new file mode 100644 index 00000000..83b65345 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_human_aging.yaml new file mode 100644 index 00000000..82e00b4b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_human_sexuality.yaml new file mode 100644 index 00000000..468589da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_international_law.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_international_law.yaml new file mode 100644 index 00000000..e5bf62a9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_international_law diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_jurisprudence.yaml new file mode 100644 index 00000000..7b533613 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_logical_fallacies.yaml new file mode 100644 index 00000000..de862b66 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_machine_learning.yaml new file mode 100644 index 00000000..c205af00 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_management.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_management.yaml new file mode 100644 index 00000000..5b624af8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_management diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_marketing.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_marketing.yaml new file mode 100644 index 00000000..81658e9f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_marketing diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_medical_genetics.yaml new file mode 100644 index 00000000..f8e52c0d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_miscellaneous.yaml new file mode 100644 index 00000000..31af482e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_moral_disputes.yaml new file mode 100644 index 00000000..853de0c1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_moral_scenarios.yaml new file mode 100644 index 00000000..8b86e045 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_nutrition.yaml new file mode 100644 index 00000000..96036dae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_philosophy.yaml new file mode 100644 index 00000000..84e827dd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_prehistory.yaml new file mode 100644 index 00000000..f49c8a5e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_accounting.yaml new file mode 100644 index 00000000..45484116 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_law.yaml new file mode 100644 index 00000000..17b28cd3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_medicine.yaml new file mode 100644 index 00000000..f4db01bc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_psychology.yaml new file mode 100644 index 00000000..be586b45 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_public_relations.yaml new file mode 100644 index 00000000..2ffe5848 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_security_studies.yaml new file mode 100644 index 00000000..b6c76948 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_sociology.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_sociology.yaml new file mode 100644 index 00000000..983e13cd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_sociology diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_us_foreign_policy.yaml new file mode 100644 index 00000000..bd6b6227 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_virology.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_virology.yaml new file mode 100644 index 00000000..92d1973b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_virology diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_world_religions.yaml new file mode 100644 index 00000000..a8c2ecca --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/nl/utils.py b/lm_eval/tasks/global_mmlu/full/nl/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny.yaml b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny.yaml new file mode 100644 index 00000000..c325bf1d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ny +task: + - global_mmlu_full_ny_stem + - global_mmlu_full_ny_other + - global_mmlu_full_ny_social_sciences + - global_mmlu_full_ny_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_humanities.yaml new file mode 100644 index 00000000..89e7618f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ny_humanities +task: + - global_mmlu_full_ny_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_other.yaml b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_other.yaml new file mode 100644 index 00000000..51b90446 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ny_other +task: + - global_mmlu_full_ny_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_social_sciences.yaml new file mode 100644 index 00000000..b711dfdf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ny_social_sciences +task: + - global_mmlu_full_ny_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_stem.yaml b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_stem.yaml new file mode 100644 index 00000000..99bf9d95 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ny_stem +task: + - global_mmlu_full_ny_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ny/_ny_template_yaml b/lm_eval/tasks/global_mmlu/full/ny/_ny_template_yaml new file mode 100644 index 00000000..069a9446 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/_ny_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ny +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_abstract_algebra.yaml new file mode 100644 index 00000000..2e3d7c33 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_anatomy.yaml new file mode 100644 index 00000000..60806afc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_astronomy.yaml new file mode 100644 index 00000000..afbcb482 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_business_ethics.yaml new file mode 100644 index 00000000..6f8981bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_clinical_knowledge.yaml new file mode 100644 index 00000000..ff44dd67 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_biology.yaml new file mode 100644 index 00000000..da5ce370 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_chemistry.yaml new file mode 100644 index 00000000..d62bce83 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_computer_science.yaml new file mode 100644 index 00000000..48cd98d5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_mathematics.yaml new file mode 100644 index 00000000..ed77ba9c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_medicine.yaml new file mode 100644 index 00000000..9cd8aa2f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_physics.yaml new file mode 100644 index 00000000..66d5dc27 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_computer_security.yaml new file mode 100644 index 00000000..8a9dae62 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_conceptual_physics.yaml new file mode 100644 index 00000000..8d160ffc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_econometrics.yaml new file mode 100644 index 00000000..88af709a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_electrical_engineering.yaml new file mode 100644 index 00000000..d835f1e0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_elementary_mathematics.yaml new file mode 100644 index 00000000..558ffd0b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_formal_logic.yaml new file mode 100644 index 00000000..cce0df19 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_global_facts.yaml new file mode 100644 index 00000000..6ce027a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_biology.yaml new file mode 100644 index 00000000..a729008d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_chemistry.yaml new file mode 100644 index 00000000..79771bfb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_computer_science.yaml new file mode 100644 index 00000000..6889806f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_european_history.yaml new file mode 100644 index 00000000..29e6e4a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_geography.yaml new file mode 100644 index 00000000..447db75f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_government_and_politics.yaml new file mode 100644 index 00000000..e543cf76 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_macroeconomics.yaml new file mode 100644 index 00000000..61c49e75 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_mathematics.yaml new file mode 100644 index 00000000..db228d02 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_microeconomics.yaml new file mode 100644 index 00000000..62d87c86 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_physics.yaml new file mode 100644 index 00000000..54c15d66 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_psychology.yaml new file mode 100644 index 00000000..4f7d8b5a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_statistics.yaml new file mode 100644 index 00000000..f53235b8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_us_history.yaml new file mode 100644 index 00000000..1d413b98 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_world_history.yaml new file mode 100644 index 00000000..4adf2e8b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_human_aging.yaml new file mode 100644 index 00000000..9660b7b1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_human_sexuality.yaml new file mode 100644 index 00000000..11a6f2d4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_international_law.yaml new file mode 100644 index 00000000..9a46ff6a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_jurisprudence.yaml new file mode 100644 index 00000000..e4606df5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_logical_fallacies.yaml new file mode 100644 index 00000000..6edade03 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_machine_learning.yaml new file mode 100644 index 00000000..765b2201 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_management.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_management.yaml new file mode 100644 index 00000000..a699a70d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_management diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_marketing.yaml new file mode 100644 index 00000000..596d6937 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_medical_genetics.yaml new file mode 100644 index 00000000..4fae66a7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_miscellaneous.yaml new file mode 100644 index 00000000..8555e173 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_moral_disputes.yaml new file mode 100644 index 00000000..b64f4d9d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_moral_scenarios.yaml new file mode 100644 index 00000000..c73f9f1a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_nutrition.yaml new file mode 100644 index 00000000..456f4cb6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_philosophy.yaml new file mode 100644 index 00000000..d0e0e05e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_prehistory.yaml new file mode 100644 index 00000000..d65c6be1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_accounting.yaml new file mode 100644 index 00000000..c152c80e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_law.yaml new file mode 100644 index 00000000..d5e2c7b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_medicine.yaml new file mode 100644 index 00000000..cacd5df7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_psychology.yaml new file mode 100644 index 00000000..ffdd86d2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_public_relations.yaml new file mode 100644 index 00000000..0e6b5ab8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_security_studies.yaml new file mode 100644 index 00000000..f894fdd7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_sociology.yaml new file mode 100644 index 00000000..1d2d0cd4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_us_foreign_policy.yaml new file mode 100644 index 00000000..a72a237d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_virology.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_virology.yaml new file mode 100644 index 00000000..9eeb7cf0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_virology diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_world_religions.yaml new file mode 100644 index 00000000..a1c243c8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ny/utils.py b/lm_eval/tasks/global_mmlu/full/ny/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl.yaml b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl.yaml new file mode 100644 index 00000000..2476fd33 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_pl +task: + - global_mmlu_full_pl_stem + - global_mmlu_full_pl_other + - global_mmlu_full_pl_social_sciences + - global_mmlu_full_pl_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_humanities.yaml b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_humanities.yaml new file mode 100644 index 00000000..4b5f7aa4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_pl_humanities +task: + - global_mmlu_full_pl_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_other.yaml b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_other.yaml new file mode 100644 index 00000000..241dbc1c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_pl_other +task: + - global_mmlu_full_pl_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_social_sciences.yaml new file mode 100644 index 00000000..9a50a315 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_pl_social_sciences +task: + - global_mmlu_full_pl_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_stem.yaml b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_stem.yaml new file mode 100644 index 00000000..3d11c89f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_pl_stem +task: + - global_mmlu_full_pl_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pl/_pl_template_yaml b/lm_eval/tasks/global_mmlu/full/pl/_pl_template_yaml new file mode 100644 index 00000000..af8809dc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/_pl_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: pl +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_abstract_algebra.yaml new file mode 100644 index 00000000..37f611a1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_anatomy.yaml new file mode 100644 index 00000000..c274bce1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_astronomy.yaml new file mode 100644 index 00000000..99220f0d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_business_ethics.yaml new file mode 100644 index 00000000..10592668 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_clinical_knowledge.yaml new file mode 100644 index 00000000..29a4fadc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_biology.yaml new file mode 100644 index 00000000..cce1671c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_chemistry.yaml new file mode 100644 index 00000000..79c63530 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_computer_science.yaml new file mode 100644 index 00000000..bb630140 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_mathematics.yaml new file mode 100644 index 00000000..6b42f767 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_medicine.yaml new file mode 100644 index 00000000..43bea976 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_physics.yaml new file mode 100644 index 00000000..0c9ea601 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_computer_security.yaml new file mode 100644 index 00000000..365b60a3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_conceptual_physics.yaml new file mode 100644 index 00000000..2b9437e3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_econometrics.yaml new file mode 100644 index 00000000..648f24c4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_electrical_engineering.yaml new file mode 100644 index 00000000..196de258 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_elementary_mathematics.yaml new file mode 100644 index 00000000..8646b6a3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_formal_logic.yaml new file mode 100644 index 00000000..2d13d283 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_global_facts.yaml new file mode 100644 index 00000000..15bb640b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_biology.yaml new file mode 100644 index 00000000..ba964028 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_chemistry.yaml new file mode 100644 index 00000000..7f142dd8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_computer_science.yaml new file mode 100644 index 00000000..99b3b9da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_european_history.yaml new file mode 100644 index 00000000..e99b2fb9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_geography.yaml new file mode 100644 index 00000000..bc6113f7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_government_and_politics.yaml new file mode 100644 index 00000000..05a7de9b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_macroeconomics.yaml new file mode 100644 index 00000000..aceda633 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_mathematics.yaml new file mode 100644 index 00000000..6eef2cd8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_microeconomics.yaml new file mode 100644 index 00000000..5adb5fa1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_physics.yaml new file mode 100644 index 00000000..fbda7920 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_psychology.yaml new file mode 100644 index 00000000..7eb09362 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_statistics.yaml new file mode 100644 index 00000000..b7beef5b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_us_history.yaml new file mode 100644 index 00000000..08f45dd9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_world_history.yaml new file mode 100644 index 00000000..99664de8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_human_aging.yaml new file mode 100644 index 00000000..d63f6f8d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_human_sexuality.yaml new file mode 100644 index 00000000..8080ca8d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_international_law.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_international_law.yaml new file mode 100644 index 00000000..425695c1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_international_law diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_jurisprudence.yaml new file mode 100644 index 00000000..a6455bd7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_logical_fallacies.yaml new file mode 100644 index 00000000..f1359b3a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_machine_learning.yaml new file mode 100644 index 00000000..3d7bb0dc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_management.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_management.yaml new file mode 100644 index 00000000..f695226c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_management diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_marketing.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_marketing.yaml new file mode 100644 index 00000000..7fedcd3f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_marketing diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_medical_genetics.yaml new file mode 100644 index 00000000..89da9f67 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_miscellaneous.yaml new file mode 100644 index 00000000..6f34762c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_moral_disputes.yaml new file mode 100644 index 00000000..25f201f4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_moral_scenarios.yaml new file mode 100644 index 00000000..fd08e6e1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_nutrition.yaml new file mode 100644 index 00000000..b61f1f17 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_philosophy.yaml new file mode 100644 index 00000000..8c1bf6dc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_prehistory.yaml new file mode 100644 index 00000000..e5329e13 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_accounting.yaml new file mode 100644 index 00000000..514b04cd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_law.yaml new file mode 100644 index 00000000..99c719f8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_medicine.yaml new file mode 100644 index 00000000..1dfafb25 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_psychology.yaml new file mode 100644 index 00000000..5b6181c0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_public_relations.yaml new file mode 100644 index 00000000..acf874db --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_security_studies.yaml new file mode 100644 index 00000000..d754904c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_sociology.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_sociology.yaml new file mode 100644 index 00000000..4bc0fd8f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_sociology diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_us_foreign_policy.yaml new file mode 100644 index 00000000..ef719be1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_virology.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_virology.yaml new file mode 100644 index 00000000..f9084c13 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_virology diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_world_religions.yaml new file mode 100644 index 00000000..036d0f4c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/pl/utils.py b/lm_eval/tasks/global_mmlu/full/pl/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt.yaml b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt.yaml new file mode 100644 index 00000000..ac79bda1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_pt +task: + - global_mmlu_full_pt_stem + - global_mmlu_full_pt_other + - global_mmlu_full_pt_social_sciences + - global_mmlu_full_pt_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_humanities.yaml b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_humanities.yaml new file mode 100644 index 00000000..261a7028 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_pt_humanities +task: + - global_mmlu_full_pt_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_other.yaml b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_other.yaml new file mode 100644 index 00000000..a61b12f5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_pt_other +task: + - global_mmlu_full_pt_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_social_sciences.yaml new file mode 100644 index 00000000..2c04bf5a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_pt_social_sciences +task: + - global_mmlu_full_pt_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_stem.yaml b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_stem.yaml new file mode 100644 index 00000000..dc3d3610 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_pt_stem +task: + - global_mmlu_full_pt_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pt/_pt_template_yaml b/lm_eval/tasks/global_mmlu/full/pt/_pt_template_yaml new file mode 100644 index 00000000..66ba2417 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/_pt_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: pt +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_abstract_algebra.yaml new file mode 100644 index 00000000..d9efd817 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_anatomy.yaml new file mode 100644 index 00000000..45390503 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_astronomy.yaml new file mode 100644 index 00000000..90880cd0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_business_ethics.yaml new file mode 100644 index 00000000..f18ef2d8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_clinical_knowledge.yaml new file mode 100644 index 00000000..2999a02a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_biology.yaml new file mode 100644 index 00000000..0cf0a61b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_chemistry.yaml new file mode 100644 index 00000000..91d8cd2e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_computer_science.yaml new file mode 100644 index 00000000..68592aaf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_mathematics.yaml new file mode 100644 index 00000000..31d7f6af --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_medicine.yaml new file mode 100644 index 00000000..46ec8232 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_physics.yaml new file mode 100644 index 00000000..2cf6402d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_computer_security.yaml new file mode 100644 index 00000000..0953a105 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_conceptual_physics.yaml new file mode 100644 index 00000000..0e6e91a9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_econometrics.yaml new file mode 100644 index 00000000..67c29915 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_electrical_engineering.yaml new file mode 100644 index 00000000..5a6ba82e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_elementary_mathematics.yaml new file mode 100644 index 00000000..3d66a664 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_formal_logic.yaml new file mode 100644 index 00000000..683d6ddd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_global_facts.yaml new file mode 100644 index 00000000..e4396542 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_biology.yaml new file mode 100644 index 00000000..89fefd1c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_chemistry.yaml new file mode 100644 index 00000000..ea323d8a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_computer_science.yaml new file mode 100644 index 00000000..5f8f0082 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_european_history.yaml new file mode 100644 index 00000000..bef7a316 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_geography.yaml new file mode 100644 index 00000000..e69c2978 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_government_and_politics.yaml new file mode 100644 index 00000000..e3fa920d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_macroeconomics.yaml new file mode 100644 index 00000000..6b7ca2f4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_mathematics.yaml new file mode 100644 index 00000000..4713674d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_microeconomics.yaml new file mode 100644 index 00000000..d6475e99 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_physics.yaml new file mode 100644 index 00000000..9eaed31a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_psychology.yaml new file mode 100644 index 00000000..d09e1eb9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_statistics.yaml new file mode 100644 index 00000000..3d8c1447 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_us_history.yaml new file mode 100644 index 00000000..a883b438 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_world_history.yaml new file mode 100644 index 00000000..6ea1454e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_human_aging.yaml new file mode 100644 index 00000000..34033c55 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_human_sexuality.yaml new file mode 100644 index 00000000..bf961c33 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_international_law.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_international_law.yaml new file mode 100644 index 00000000..5247fc9b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_international_law diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_jurisprudence.yaml new file mode 100644 index 00000000..07e78da5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_logical_fallacies.yaml new file mode 100644 index 00000000..c2451399 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_machine_learning.yaml new file mode 100644 index 00000000..79c577ea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_management.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_management.yaml new file mode 100644 index 00000000..a344b1c3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_management diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_marketing.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_marketing.yaml new file mode 100644 index 00000000..eeff36b9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_marketing diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_medical_genetics.yaml new file mode 100644 index 00000000..27985380 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_miscellaneous.yaml new file mode 100644 index 00000000..e2fa1da1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_moral_disputes.yaml new file mode 100644 index 00000000..e83d186e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_moral_scenarios.yaml new file mode 100644 index 00000000..3529a15c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_nutrition.yaml new file mode 100644 index 00000000..e51eefe0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_philosophy.yaml new file mode 100644 index 00000000..ec0826b9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_prehistory.yaml new file mode 100644 index 00000000..324dfe69 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_accounting.yaml new file mode 100644 index 00000000..530c918e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_law.yaml new file mode 100644 index 00000000..f7a3679c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_medicine.yaml new file mode 100644 index 00000000..0f4cc006 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_psychology.yaml new file mode 100644 index 00000000..4c5884c9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_public_relations.yaml new file mode 100644 index 00000000..bb2d6536 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_security_studies.yaml new file mode 100644 index 00000000..1af8d662 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_sociology.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_sociology.yaml new file mode 100644 index 00000000..3ef8fcb7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_sociology diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_us_foreign_policy.yaml new file mode 100644 index 00000000..8b48f528 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_virology.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_virology.yaml new file mode 100644 index 00000000..4b0de753 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_virology diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_world_religions.yaml new file mode 100644 index 00000000..79648586 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/pt/utils.py b/lm_eval/tasks/global_mmlu/full/pt/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro.yaml b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro.yaml new file mode 100644 index 00000000..b3aa5f49 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ro +task: + - global_mmlu_full_ro_stem + - global_mmlu_full_ro_other + - global_mmlu_full_ro_social_sciences + - global_mmlu_full_ro_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_humanities.yaml new file mode 100644 index 00000000..d54268b0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ro_humanities +task: + - global_mmlu_full_ro_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_other.yaml b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_other.yaml new file mode 100644 index 00000000..4e58aea9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ro_other +task: + - global_mmlu_full_ro_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_social_sciences.yaml new file mode 100644 index 00000000..e1cb84a9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ro_social_sciences +task: + - global_mmlu_full_ro_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_stem.yaml b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_stem.yaml new file mode 100644 index 00000000..de0e406f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ro_stem +task: + - global_mmlu_full_ro_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ro/_ro_template_yaml b/lm_eval/tasks/global_mmlu/full/ro/_ro_template_yaml new file mode 100644 index 00000000..e5cb6dd0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/_ro_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ro +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_abstract_algebra.yaml new file mode 100644 index 00000000..c505fb8b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_anatomy.yaml new file mode 100644 index 00000000..0c13018c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_astronomy.yaml new file mode 100644 index 00000000..9f4caefb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_business_ethics.yaml new file mode 100644 index 00000000..1c1387fd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_clinical_knowledge.yaml new file mode 100644 index 00000000..b9e0dbb4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_biology.yaml new file mode 100644 index 00000000..5bf14ab0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_chemistry.yaml new file mode 100644 index 00000000..59034744 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_computer_science.yaml new file mode 100644 index 00000000..6bb64c2e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_mathematics.yaml new file mode 100644 index 00000000..d719a5ef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_medicine.yaml new file mode 100644 index 00000000..c9284a8f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_physics.yaml new file mode 100644 index 00000000..1d27d843 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_computer_security.yaml new file mode 100644 index 00000000..1d63556e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_conceptual_physics.yaml new file mode 100644 index 00000000..25f30a36 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_econometrics.yaml new file mode 100644 index 00000000..1fa6b5d2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_electrical_engineering.yaml new file mode 100644 index 00000000..f6eb4b6e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_elementary_mathematics.yaml new file mode 100644 index 00000000..e99772e2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_formal_logic.yaml new file mode 100644 index 00000000..be99bd00 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_global_facts.yaml new file mode 100644 index 00000000..819937e7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_biology.yaml new file mode 100644 index 00000000..d7509581 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_chemistry.yaml new file mode 100644 index 00000000..d089583f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_computer_science.yaml new file mode 100644 index 00000000..46d5f472 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_european_history.yaml new file mode 100644 index 00000000..1a1ae7e7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_geography.yaml new file mode 100644 index 00000000..92935be5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_government_and_politics.yaml new file mode 100644 index 00000000..efd2a03f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_macroeconomics.yaml new file mode 100644 index 00000000..fe2f97d1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_mathematics.yaml new file mode 100644 index 00000000..f0432a01 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_microeconomics.yaml new file mode 100644 index 00000000..507fab86 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_physics.yaml new file mode 100644 index 00000000..19a76707 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_psychology.yaml new file mode 100644 index 00000000..d27fc262 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_statistics.yaml new file mode 100644 index 00000000..8f8023bc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_us_history.yaml new file mode 100644 index 00000000..acc5fc41 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_world_history.yaml new file mode 100644 index 00000000..9ea7c933 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_human_aging.yaml new file mode 100644 index 00000000..6b984c55 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_human_sexuality.yaml new file mode 100644 index 00000000..e2af2cbe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_international_law.yaml new file mode 100644 index 00000000..1cbf3d03 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_jurisprudence.yaml new file mode 100644 index 00000000..d0acaca0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_logical_fallacies.yaml new file mode 100644 index 00000000..c84234a0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_machine_learning.yaml new file mode 100644 index 00000000..09237c9e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_management.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_management.yaml new file mode 100644 index 00000000..fcb3f485 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_management diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_marketing.yaml new file mode 100644 index 00000000..33b486c0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_medical_genetics.yaml new file mode 100644 index 00000000..09c3d5e9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_miscellaneous.yaml new file mode 100644 index 00000000..e744e1e7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_moral_disputes.yaml new file mode 100644 index 00000000..4e6d4ed7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_moral_scenarios.yaml new file mode 100644 index 00000000..d0e99149 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_nutrition.yaml new file mode 100644 index 00000000..850262c1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_philosophy.yaml new file mode 100644 index 00000000..9dd2bf54 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_prehistory.yaml new file mode 100644 index 00000000..b2ecf40d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_accounting.yaml new file mode 100644 index 00000000..db259766 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_law.yaml new file mode 100644 index 00000000..b1e43974 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_medicine.yaml new file mode 100644 index 00000000..0158c545 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_psychology.yaml new file mode 100644 index 00000000..bdd7ca7f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_public_relations.yaml new file mode 100644 index 00000000..5f7f0f51 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_security_studies.yaml new file mode 100644 index 00000000..be9b334e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_sociology.yaml new file mode 100644 index 00000000..f37228bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_us_foreign_policy.yaml new file mode 100644 index 00000000..aae05dc9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_virology.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_virology.yaml new file mode 100644 index 00000000..2d789c20 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_virology diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_world_religions.yaml new file mode 100644 index 00000000..40ff8228 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ro/utils.py b/lm_eval/tasks/global_mmlu/full/ro/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru.yaml b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru.yaml new file mode 100644 index 00000000..cc63cd34 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ru +task: + - global_mmlu_full_ru_stem + - global_mmlu_full_ru_other + - global_mmlu_full_ru_social_sciences + - global_mmlu_full_ru_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_humanities.yaml new file mode 100644 index 00000000..55422b43 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ru_humanities +task: + - global_mmlu_full_ru_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_other.yaml b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_other.yaml new file mode 100644 index 00000000..d47ccc60 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ru_other +task: + - global_mmlu_full_ru_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_social_sciences.yaml new file mode 100644 index 00000000..12d48428 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ru_social_sciences +task: + - global_mmlu_full_ru_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_stem.yaml b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_stem.yaml new file mode 100644 index 00000000..70ae3edb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ru_stem +task: + - global_mmlu_full_ru_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ru/_ru_template_yaml b/lm_eval/tasks/global_mmlu/full/ru/_ru_template_yaml new file mode 100644 index 00000000..4b2f491b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/_ru_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ru +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_abstract_algebra.yaml new file mode 100644 index 00000000..de158df8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_anatomy.yaml new file mode 100644 index 00000000..aab717e4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_astronomy.yaml new file mode 100644 index 00000000..3d8d0e32 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_business_ethics.yaml new file mode 100644 index 00000000..d2855ca3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_clinical_knowledge.yaml new file mode 100644 index 00000000..2efe0829 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_biology.yaml new file mode 100644 index 00000000..96d00deb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_chemistry.yaml new file mode 100644 index 00000000..0a5aac35 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_computer_science.yaml new file mode 100644 index 00000000..bd8bf28b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_mathematics.yaml new file mode 100644 index 00000000..a2e080c3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_medicine.yaml new file mode 100644 index 00000000..70e8448e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_physics.yaml new file mode 100644 index 00000000..8e6ecbcf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_computer_security.yaml new file mode 100644 index 00000000..f196351a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_conceptual_physics.yaml new file mode 100644 index 00000000..e623d78f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_econometrics.yaml new file mode 100644 index 00000000..df35a1f6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_electrical_engineering.yaml new file mode 100644 index 00000000..82c49f89 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_elementary_mathematics.yaml new file mode 100644 index 00000000..6ed11c5f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_formal_logic.yaml new file mode 100644 index 00000000..8ebe62bf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_global_facts.yaml new file mode 100644 index 00000000..27d6ad70 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_biology.yaml new file mode 100644 index 00000000..7860e73e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_chemistry.yaml new file mode 100644 index 00000000..7596daa3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_computer_science.yaml new file mode 100644 index 00000000..ecb64d52 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_european_history.yaml new file mode 100644 index 00000000..92feccc5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_geography.yaml new file mode 100644 index 00000000..6f586f50 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_government_and_politics.yaml new file mode 100644 index 00000000..0ffc85df --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_macroeconomics.yaml new file mode 100644 index 00000000..5da13204 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_mathematics.yaml new file mode 100644 index 00000000..fc684975 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_microeconomics.yaml new file mode 100644 index 00000000..84887d18 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_physics.yaml new file mode 100644 index 00000000..29ddf5bf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_psychology.yaml new file mode 100644 index 00000000..a0680bad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_statistics.yaml new file mode 100644 index 00000000..07ac341b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_us_history.yaml new file mode 100644 index 00000000..18e12bcd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_world_history.yaml new file mode 100644 index 00000000..c37522a2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_human_aging.yaml new file mode 100644 index 00000000..cbd6bf32 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_human_sexuality.yaml new file mode 100644 index 00000000..8766c348 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_international_law.yaml new file mode 100644 index 00000000..4edbb98c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_jurisprudence.yaml new file mode 100644 index 00000000..24cea632 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_logical_fallacies.yaml new file mode 100644 index 00000000..3160fadc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_machine_learning.yaml new file mode 100644 index 00000000..b8e480e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_management.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_management.yaml new file mode 100644 index 00000000..4a7b77a1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_management diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_marketing.yaml new file mode 100644 index 00000000..c71a4f29 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_medical_genetics.yaml new file mode 100644 index 00000000..ac34ba20 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_miscellaneous.yaml new file mode 100644 index 00000000..6049ccb1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_moral_disputes.yaml new file mode 100644 index 00000000..d974ccfa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_moral_scenarios.yaml new file mode 100644 index 00000000..f05f7de9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_nutrition.yaml new file mode 100644 index 00000000..59cc8dee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_philosophy.yaml new file mode 100644 index 00000000..eb78b1f7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_prehistory.yaml new file mode 100644 index 00000000..685bb2a4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_accounting.yaml new file mode 100644 index 00000000..35c21255 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_law.yaml new file mode 100644 index 00000000..ce70d006 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_medicine.yaml new file mode 100644 index 00000000..cce88d1d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_psychology.yaml new file mode 100644 index 00000000..39fc8953 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_public_relations.yaml new file mode 100644 index 00000000..3dfd71cc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_security_studies.yaml new file mode 100644 index 00000000..bd08ea34 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_sociology.yaml new file mode 100644 index 00000000..ef616ee1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_us_foreign_policy.yaml new file mode 100644 index 00000000..c8244e65 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_virology.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_virology.yaml new file mode 100644 index 00000000..2f4df810 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_virology diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_world_religions.yaml new file mode 100644 index 00000000..06f71986 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ru/utils.py b/lm_eval/tasks/global_mmlu/full/ru/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si.yaml b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si.yaml new file mode 100644 index 00000000..4deed570 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_si +task: + - global_mmlu_full_si_stem + - global_mmlu_full_si_other + - global_mmlu_full_si_social_sciences + - global_mmlu_full_si_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_humanities.yaml b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_humanities.yaml new file mode 100644 index 00000000..b97994d0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_si_humanities +task: + - global_mmlu_full_si_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_other.yaml b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_other.yaml new file mode 100644 index 00000000..e7600ca4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_si_other +task: + - global_mmlu_full_si_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_social_sciences.yaml new file mode 100644 index 00000000..4e2351a2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_si_social_sciences +task: + - global_mmlu_full_si_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_stem.yaml b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_stem.yaml new file mode 100644 index 00000000..8878bf80 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_si_stem +task: + - global_mmlu_full_si_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/si/_si_template_yaml b/lm_eval/tasks/global_mmlu/full/si/_si_template_yaml new file mode 100644 index 00000000..5c775b20 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/_si_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: si +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_abstract_algebra.yaml new file mode 100644 index 00000000..b81c5803 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_anatomy.yaml new file mode 100644 index 00000000..32315245 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_astronomy.yaml new file mode 100644 index 00000000..c7ab9539 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_business_ethics.yaml new file mode 100644 index 00000000..8281fc42 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_clinical_knowledge.yaml new file mode 100644 index 00000000..2a7f5cf5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_biology.yaml new file mode 100644 index 00000000..e54148da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_chemistry.yaml new file mode 100644 index 00000000..b797ac60 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_computer_science.yaml new file mode 100644 index 00000000..ba69de35 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_mathematics.yaml new file mode 100644 index 00000000..65ed9424 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_medicine.yaml new file mode 100644 index 00000000..1418aa0d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_physics.yaml new file mode 100644 index 00000000..cb32cd4f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_computer_security.yaml new file mode 100644 index 00000000..ce5ab9b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_conceptual_physics.yaml new file mode 100644 index 00000000..c2ab5718 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_econometrics.yaml new file mode 100644 index 00000000..5e764903 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_electrical_engineering.yaml new file mode 100644 index 00000000..99679bb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_elementary_mathematics.yaml new file mode 100644 index 00000000..553bc9bb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_formal_logic.yaml new file mode 100644 index 00000000..112814b6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_global_facts.yaml new file mode 100644 index 00000000..008b5537 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_biology.yaml new file mode 100644 index 00000000..fecd995a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_chemistry.yaml new file mode 100644 index 00000000..3d3018b6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_computer_science.yaml new file mode 100644 index 00000000..e80a1f2c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_european_history.yaml new file mode 100644 index 00000000..10e15738 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_geography.yaml new file mode 100644 index 00000000..12d90b97 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_government_and_politics.yaml new file mode 100644 index 00000000..d285c2c6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_macroeconomics.yaml new file mode 100644 index 00000000..1c85f2df --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_mathematics.yaml new file mode 100644 index 00000000..b292fa50 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_microeconomics.yaml new file mode 100644 index 00000000..ada74f5f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_physics.yaml new file mode 100644 index 00000000..84bbda28 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_psychology.yaml new file mode 100644 index 00000000..7c378798 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_statistics.yaml new file mode 100644 index 00000000..13758f22 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_us_history.yaml new file mode 100644 index 00000000..0fe85e14 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_world_history.yaml new file mode 100644 index 00000000..8afaa392 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_human_aging.yaml new file mode 100644 index 00000000..2cf69a68 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_human_sexuality.yaml new file mode 100644 index 00000000..418927d5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_international_law.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_international_law.yaml new file mode 100644 index 00000000..de0a611d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_international_law diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_jurisprudence.yaml new file mode 100644 index 00000000..10212173 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_logical_fallacies.yaml new file mode 100644 index 00000000..d31372ad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_machine_learning.yaml new file mode 100644 index 00000000..0e3d0e7c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_management.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_management.yaml new file mode 100644 index 00000000..f4e29c9a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_management diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_marketing.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_marketing.yaml new file mode 100644 index 00000000..8dff414a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_marketing diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_medical_genetics.yaml new file mode 100644 index 00000000..6160f02b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_miscellaneous.yaml new file mode 100644 index 00000000..de1db6c9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_moral_disputes.yaml new file mode 100644 index 00000000..d48cf75c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_moral_scenarios.yaml new file mode 100644 index 00000000..5d08b811 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_nutrition.yaml new file mode 100644 index 00000000..3163db49 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_philosophy.yaml new file mode 100644 index 00000000..f809bddd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_prehistory.yaml new file mode 100644 index 00000000..964e6ab7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_accounting.yaml new file mode 100644 index 00000000..c04e0bbc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_law.yaml new file mode 100644 index 00000000..6542f14e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_medicine.yaml new file mode 100644 index 00000000..38448979 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_psychology.yaml new file mode 100644 index 00000000..80f36885 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_public_relations.yaml new file mode 100644 index 00000000..2ac5169e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_security_studies.yaml new file mode 100644 index 00000000..21423506 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_sociology.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_sociology.yaml new file mode 100644 index 00000000..c86ee0a3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_sociology diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_us_foreign_policy.yaml new file mode 100644 index 00000000..28c238e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_virology.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_virology.yaml new file mode 100644 index 00000000..a1935460 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_virology diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_world_religions.yaml new file mode 100644 index 00000000..424c23c2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/si/utils.py b/lm_eval/tasks/global_mmlu/full/si/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn.yaml b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn.yaml new file mode 100644 index 00000000..98ced987 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_sn +task: + - global_mmlu_full_sn_stem + - global_mmlu_full_sn_other + - global_mmlu_full_sn_social_sciences + - global_mmlu_full_sn_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_humanities.yaml b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_humanities.yaml new file mode 100644 index 00000000..69690862 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sn_humanities +task: + - global_mmlu_full_sn_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_other.yaml b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_other.yaml new file mode 100644 index 00000000..18e750b6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sn_other +task: + - global_mmlu_full_sn_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_social_sciences.yaml new file mode 100644 index 00000000..a8e76215 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sn_social_sciences +task: + - global_mmlu_full_sn_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_stem.yaml b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_stem.yaml new file mode 100644 index 00000000..b3136233 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sn_stem +task: + - global_mmlu_full_sn_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sn/_sn_template_yaml b/lm_eval/tasks/global_mmlu/full/sn/_sn_template_yaml new file mode 100644 index 00000000..30d50ba0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/_sn_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: sn +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_abstract_algebra.yaml new file mode 100644 index 00000000..c4de495e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_anatomy.yaml new file mode 100644 index 00000000..1ef227aa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_astronomy.yaml new file mode 100644 index 00000000..8662ab96 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_business_ethics.yaml new file mode 100644 index 00000000..6f4741c0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_clinical_knowledge.yaml new file mode 100644 index 00000000..7477170e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_biology.yaml new file mode 100644 index 00000000..6d0ec277 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_chemistry.yaml new file mode 100644 index 00000000..9f0c4f42 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_computer_science.yaml new file mode 100644 index 00000000..c8651ee1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_mathematics.yaml new file mode 100644 index 00000000..c1d1a98e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_medicine.yaml new file mode 100644 index 00000000..d9ce08f3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_physics.yaml new file mode 100644 index 00000000..ae34a82a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_computer_security.yaml new file mode 100644 index 00000000..4b41c175 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_conceptual_physics.yaml new file mode 100644 index 00000000..5aaa8a78 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_econometrics.yaml new file mode 100644 index 00000000..8606e96c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_electrical_engineering.yaml new file mode 100644 index 00000000..9c57f703 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_elementary_mathematics.yaml new file mode 100644 index 00000000..0ed5b400 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_formal_logic.yaml new file mode 100644 index 00000000..55dafc2b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_global_facts.yaml new file mode 100644 index 00000000..5b8ee96f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_biology.yaml new file mode 100644 index 00000000..2597a7d7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_chemistry.yaml new file mode 100644 index 00000000..1e6be4e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_computer_science.yaml new file mode 100644 index 00000000..446da912 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_european_history.yaml new file mode 100644 index 00000000..dd8cf61c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_geography.yaml new file mode 100644 index 00000000..2e178adf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_government_and_politics.yaml new file mode 100644 index 00000000..1ac4efda --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_macroeconomics.yaml new file mode 100644 index 00000000..23ca0b41 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_mathematics.yaml new file mode 100644 index 00000000..0bd9be19 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_microeconomics.yaml new file mode 100644 index 00000000..916e14ca --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_physics.yaml new file mode 100644 index 00000000..b6a3e60c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_psychology.yaml new file mode 100644 index 00000000..62a197c0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_statistics.yaml new file mode 100644 index 00000000..815cb60b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_us_history.yaml new file mode 100644 index 00000000..ff9f970e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_world_history.yaml new file mode 100644 index 00000000..b2dedc38 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_human_aging.yaml new file mode 100644 index 00000000..0ef13930 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_human_sexuality.yaml new file mode 100644 index 00000000..a52c2ded --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_international_law.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_international_law.yaml new file mode 100644 index 00000000..648c3dea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_international_law diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_jurisprudence.yaml new file mode 100644 index 00000000..ca63c411 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_logical_fallacies.yaml new file mode 100644 index 00000000..d74a7f18 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_machine_learning.yaml new file mode 100644 index 00000000..db272b3b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_management.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_management.yaml new file mode 100644 index 00000000..db3bee4d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_management diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_marketing.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_marketing.yaml new file mode 100644 index 00000000..a700c4e7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_marketing diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_medical_genetics.yaml new file mode 100644 index 00000000..b826b187 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_miscellaneous.yaml new file mode 100644 index 00000000..dea895aa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_moral_disputes.yaml new file mode 100644 index 00000000..b641f6b2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_moral_scenarios.yaml new file mode 100644 index 00000000..2951a953 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_nutrition.yaml new file mode 100644 index 00000000..9816d8b4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_philosophy.yaml new file mode 100644 index 00000000..4ea10505 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_prehistory.yaml new file mode 100644 index 00000000..e941437b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_accounting.yaml new file mode 100644 index 00000000..057a197d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_law.yaml new file mode 100644 index 00000000..72c9fac7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_medicine.yaml new file mode 100644 index 00000000..e727b3cd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_psychology.yaml new file mode 100644 index 00000000..341322d2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_public_relations.yaml new file mode 100644 index 00000000..5448baa4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_security_studies.yaml new file mode 100644 index 00000000..542c709a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_sociology.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_sociology.yaml new file mode 100644 index 00000000..f2913db5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_sociology diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_us_foreign_policy.yaml new file mode 100644 index 00000000..ad476847 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_virology.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_virology.yaml new file mode 100644 index 00000000..254fedb4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_virology diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_world_religions.yaml new file mode 100644 index 00000000..2aef6dfd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/sn/utils.py b/lm_eval/tasks/global_mmlu/full/sn/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so.yaml b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so.yaml new file mode 100644 index 00000000..014a4121 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_so +task: + - global_mmlu_full_so_stem + - global_mmlu_full_so_other + - global_mmlu_full_so_social_sciences + - global_mmlu_full_so_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_humanities.yaml b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_humanities.yaml new file mode 100644 index 00000000..ff78bfab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_so_humanities +task: + - global_mmlu_full_so_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_other.yaml b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_other.yaml new file mode 100644 index 00000000..eec8e661 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_so_other +task: + - global_mmlu_full_so_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_social_sciences.yaml new file mode 100644 index 00000000..9d00ea1f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_so_social_sciences +task: + - global_mmlu_full_so_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_stem.yaml b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_stem.yaml new file mode 100644 index 00000000..497b9b01 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_so_stem +task: + - global_mmlu_full_so_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/so/_so_template_yaml b/lm_eval/tasks/global_mmlu/full/so/_so_template_yaml new file mode 100644 index 00000000..fb052a63 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/_so_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: so +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_abstract_algebra.yaml new file mode 100644 index 00000000..afb5d908 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_anatomy.yaml new file mode 100644 index 00000000..79f3446d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_astronomy.yaml new file mode 100644 index 00000000..54a2faa0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_business_ethics.yaml new file mode 100644 index 00000000..65bc598c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_clinical_knowledge.yaml new file mode 100644 index 00000000..224aa39b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_biology.yaml new file mode 100644 index 00000000..758d22c3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_chemistry.yaml new file mode 100644 index 00000000..35c22430 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_computer_science.yaml new file mode 100644 index 00000000..86428ae8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_mathematics.yaml new file mode 100644 index 00000000..f9957a23 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_medicine.yaml new file mode 100644 index 00000000..f51a1b12 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_physics.yaml new file mode 100644 index 00000000..43388d6c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_computer_security.yaml new file mode 100644 index 00000000..8a556330 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_conceptual_physics.yaml new file mode 100644 index 00000000..97dfa147 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_econometrics.yaml new file mode 100644 index 00000000..9792659f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_electrical_engineering.yaml new file mode 100644 index 00000000..3ed44e41 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_elementary_mathematics.yaml new file mode 100644 index 00000000..76628481 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_formal_logic.yaml new file mode 100644 index 00000000..4b7645c8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_global_facts.yaml new file mode 100644 index 00000000..fa75e666 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_biology.yaml new file mode 100644 index 00000000..d3ad29d4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_chemistry.yaml new file mode 100644 index 00000000..274af23b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_computer_science.yaml new file mode 100644 index 00000000..6bce30d6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_european_history.yaml new file mode 100644 index 00000000..cfc44f08 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_geography.yaml new file mode 100644 index 00000000..55479c39 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_government_and_politics.yaml new file mode 100644 index 00000000..ceb5a701 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_macroeconomics.yaml new file mode 100644 index 00000000..0c403ec5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_mathematics.yaml new file mode 100644 index 00000000..e8089bdf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_microeconomics.yaml new file mode 100644 index 00000000..32cacffe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_physics.yaml new file mode 100644 index 00000000..fd2c35ac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_psychology.yaml new file mode 100644 index 00000000..26f2cb3c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_statistics.yaml new file mode 100644 index 00000000..730075b1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_us_history.yaml new file mode 100644 index 00000000..c9702a66 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_world_history.yaml new file mode 100644 index 00000000..78a21d5d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_human_aging.yaml new file mode 100644 index 00000000..c95b5562 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_human_sexuality.yaml new file mode 100644 index 00000000..632778d3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_international_law.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_international_law.yaml new file mode 100644 index 00000000..2d5ab1c5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_international_law diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_jurisprudence.yaml new file mode 100644 index 00000000..1372a1d1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_logical_fallacies.yaml new file mode 100644 index 00000000..19a1120e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_machine_learning.yaml new file mode 100644 index 00000000..c1e13dda --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_management.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_management.yaml new file mode 100644 index 00000000..6e325205 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_management diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_marketing.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_marketing.yaml new file mode 100644 index 00000000..8b1c002f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_marketing diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_medical_genetics.yaml new file mode 100644 index 00000000..c0136dc6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_miscellaneous.yaml new file mode 100644 index 00000000..2b8a33ba --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_moral_disputes.yaml new file mode 100644 index 00000000..c1bd0011 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_moral_scenarios.yaml new file mode 100644 index 00000000..60418a65 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_nutrition.yaml new file mode 100644 index 00000000..5aa40241 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_philosophy.yaml new file mode 100644 index 00000000..421a9801 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_prehistory.yaml new file mode 100644 index 00000000..721bfbf2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_accounting.yaml new file mode 100644 index 00000000..4ca0c5c9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_law.yaml new file mode 100644 index 00000000..7f57b594 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_medicine.yaml new file mode 100644 index 00000000..a7d6408e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_psychology.yaml new file mode 100644 index 00000000..a03de5bb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_public_relations.yaml new file mode 100644 index 00000000..f7af81e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_security_studies.yaml new file mode 100644 index 00000000..b52ee259 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_sociology.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_sociology.yaml new file mode 100644 index 00000000..7f3847e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_sociology diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_us_foreign_policy.yaml new file mode 100644 index 00000000..a6017167 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_virology.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_virology.yaml new file mode 100644 index 00000000..2dc85b32 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_virology diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_world_religions.yaml new file mode 100644 index 00000000..9ca99e5b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/so/utils.py b/lm_eval/tasks/global_mmlu/full/so/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr.yaml b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr.yaml new file mode 100644 index 00000000..e322d980 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_sr +task: + - global_mmlu_full_sr_stem + - global_mmlu_full_sr_other + - global_mmlu_full_sr_social_sciences + - global_mmlu_full_sr_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_humanities.yaml b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_humanities.yaml new file mode 100644 index 00000000..080bc545 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sr_humanities +task: + - global_mmlu_full_sr_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_other.yaml b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_other.yaml new file mode 100644 index 00000000..9f0735eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sr_other +task: + - global_mmlu_full_sr_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_social_sciences.yaml new file mode 100644 index 00000000..bdc29d1f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sr_social_sciences +task: + - global_mmlu_full_sr_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_stem.yaml b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_stem.yaml new file mode 100644 index 00000000..7c4aa636 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sr_stem +task: + - global_mmlu_full_sr_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sr/_sr_template_yaml b/lm_eval/tasks/global_mmlu/full/sr/_sr_template_yaml new file mode 100644 index 00000000..6af61b3b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/_sr_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: sr +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_abstract_algebra.yaml new file mode 100644 index 00000000..b3275870 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_anatomy.yaml new file mode 100644 index 00000000..5689af73 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_astronomy.yaml new file mode 100644 index 00000000..3d23a438 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_business_ethics.yaml new file mode 100644 index 00000000..e89f5e61 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_clinical_knowledge.yaml new file mode 100644 index 00000000..b5611c15 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_biology.yaml new file mode 100644 index 00000000..9e28c303 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_chemistry.yaml new file mode 100644 index 00000000..1eac952c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_computer_science.yaml new file mode 100644 index 00000000..e1146aa1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_mathematics.yaml new file mode 100644 index 00000000..bcfda2ba --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_medicine.yaml new file mode 100644 index 00000000..3beb5b26 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_physics.yaml new file mode 100644 index 00000000..f959a02f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_computer_security.yaml new file mode 100644 index 00000000..7e8761e0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_conceptual_physics.yaml new file mode 100644 index 00000000..9325f6de --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_econometrics.yaml new file mode 100644 index 00000000..cc4a5bcc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_electrical_engineering.yaml new file mode 100644 index 00000000..d3a5a78b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_elementary_mathematics.yaml new file mode 100644 index 00000000..50f60166 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_formal_logic.yaml new file mode 100644 index 00000000..8bdd854f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_global_facts.yaml new file mode 100644 index 00000000..88862d21 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_biology.yaml new file mode 100644 index 00000000..8f2b2952 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_chemistry.yaml new file mode 100644 index 00000000..6b89deb1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_computer_science.yaml new file mode 100644 index 00000000..55fd7e8e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_european_history.yaml new file mode 100644 index 00000000..946acf0e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_geography.yaml new file mode 100644 index 00000000..07058971 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_government_and_politics.yaml new file mode 100644 index 00000000..a9721c9b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_macroeconomics.yaml new file mode 100644 index 00000000..fedea95a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_mathematics.yaml new file mode 100644 index 00000000..dca9e140 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_microeconomics.yaml new file mode 100644 index 00000000..b01276f6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_physics.yaml new file mode 100644 index 00000000..f549f8ac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_psychology.yaml new file mode 100644 index 00000000..c6b31eee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_statistics.yaml new file mode 100644 index 00000000..12d0f0e5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_us_history.yaml new file mode 100644 index 00000000..98c40100 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_world_history.yaml new file mode 100644 index 00000000..76e6b45c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_human_aging.yaml new file mode 100644 index 00000000..b0ff1d95 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_human_sexuality.yaml new file mode 100644 index 00000000..73a30099 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_international_law.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_international_law.yaml new file mode 100644 index 00000000..0aea0826 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_international_law diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_jurisprudence.yaml new file mode 100644 index 00000000..debe604f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_logical_fallacies.yaml new file mode 100644 index 00000000..407417f3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_machine_learning.yaml new file mode 100644 index 00000000..513a7f87 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_management.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_management.yaml new file mode 100644 index 00000000..fca9de04 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_management diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_marketing.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_marketing.yaml new file mode 100644 index 00000000..8267563e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_marketing diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_medical_genetics.yaml new file mode 100644 index 00000000..4ba860f2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_miscellaneous.yaml new file mode 100644 index 00000000..ecdbcea9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_moral_disputes.yaml new file mode 100644 index 00000000..54bf3491 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_moral_scenarios.yaml new file mode 100644 index 00000000..2eab8d4a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_nutrition.yaml new file mode 100644 index 00000000..83e1b84c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_philosophy.yaml new file mode 100644 index 00000000..654ee86b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_prehistory.yaml new file mode 100644 index 00000000..3a2f944b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_accounting.yaml new file mode 100644 index 00000000..648ae0cb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_law.yaml new file mode 100644 index 00000000..0ee8a831 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_medicine.yaml new file mode 100644 index 00000000..3b142115 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_psychology.yaml new file mode 100644 index 00000000..19e2dc54 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_public_relations.yaml new file mode 100644 index 00000000..043024c0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_security_studies.yaml new file mode 100644 index 00000000..24720925 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_sociology.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_sociology.yaml new file mode 100644 index 00000000..fc93c5e0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_sociology diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_us_foreign_policy.yaml new file mode 100644 index 00000000..1b338dd6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_virology.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_virology.yaml new file mode 100644 index 00000000..b07588ad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_virology diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_world_religions.yaml new file mode 100644 index 00000000..3f78403e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/sr/utils.py b/lm_eval/tasks/global_mmlu/full/sr/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv.yaml b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv.yaml new file mode 100644 index 00000000..a9b0dc1b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_sv +task: + - global_mmlu_full_sv_stem + - global_mmlu_full_sv_other + - global_mmlu_full_sv_social_sciences + - global_mmlu_full_sv_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_humanities.yaml b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_humanities.yaml new file mode 100644 index 00000000..f8b4628f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sv_humanities +task: + - global_mmlu_full_sv_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_other.yaml b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_other.yaml new file mode 100644 index 00000000..1b29ca13 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sv_other +task: + - global_mmlu_full_sv_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_social_sciences.yaml new file mode 100644 index 00000000..7c4a813e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sv_social_sciences +task: + - global_mmlu_full_sv_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_stem.yaml b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_stem.yaml new file mode 100644 index 00000000..a6fd88f1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sv_stem +task: + - global_mmlu_full_sv_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sv/_sv_template_yaml b/lm_eval/tasks/global_mmlu/full/sv/_sv_template_yaml new file mode 100644 index 00000000..1b9fdea9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/_sv_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: sv +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_abstract_algebra.yaml new file mode 100644 index 00000000..8329302f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_anatomy.yaml new file mode 100644 index 00000000..ac9fa560 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_astronomy.yaml new file mode 100644 index 00000000..096e0e8f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_business_ethics.yaml new file mode 100644 index 00000000..ced0b051 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_clinical_knowledge.yaml new file mode 100644 index 00000000..a88871b4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_biology.yaml new file mode 100644 index 00000000..c2462c17 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_chemistry.yaml new file mode 100644 index 00000000..3ae3fecd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_computer_science.yaml new file mode 100644 index 00000000..a3f00b24 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_mathematics.yaml new file mode 100644 index 00000000..71f613d4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_medicine.yaml new file mode 100644 index 00000000..46f4c6ea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_physics.yaml new file mode 100644 index 00000000..06906bfd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_computer_security.yaml new file mode 100644 index 00000000..1013ef30 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_conceptual_physics.yaml new file mode 100644 index 00000000..a6a752f0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_econometrics.yaml new file mode 100644 index 00000000..547365f6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_electrical_engineering.yaml new file mode 100644 index 00000000..74086a15 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_elementary_mathematics.yaml new file mode 100644 index 00000000..8d1f4847 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_formal_logic.yaml new file mode 100644 index 00000000..b78b5846 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_global_facts.yaml new file mode 100644 index 00000000..dd205629 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_biology.yaml new file mode 100644 index 00000000..fc6ebf2f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_chemistry.yaml new file mode 100644 index 00000000..03773a83 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_computer_science.yaml new file mode 100644 index 00000000..e3db653a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_european_history.yaml new file mode 100644 index 00000000..4a087557 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_geography.yaml new file mode 100644 index 00000000..63855384 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_government_and_politics.yaml new file mode 100644 index 00000000..7e62f26f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_macroeconomics.yaml new file mode 100644 index 00000000..b686a26e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_mathematics.yaml new file mode 100644 index 00000000..17716538 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_microeconomics.yaml new file mode 100644 index 00000000..e9817c17 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_physics.yaml new file mode 100644 index 00000000..61359149 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_psychology.yaml new file mode 100644 index 00000000..ce3aa9e2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_statistics.yaml new file mode 100644 index 00000000..6f705f8e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_us_history.yaml new file mode 100644 index 00000000..765cdf60 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_world_history.yaml new file mode 100644 index 00000000..de7b30b4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_human_aging.yaml new file mode 100644 index 00000000..20969051 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_human_sexuality.yaml new file mode 100644 index 00000000..a8bd5fab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_international_law.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_international_law.yaml new file mode 100644 index 00000000..7e5ddb57 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_international_law diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_jurisprudence.yaml new file mode 100644 index 00000000..ff161d5f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_logical_fallacies.yaml new file mode 100644 index 00000000..f1602c90 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_machine_learning.yaml new file mode 100644 index 00000000..6f011063 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_management.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_management.yaml new file mode 100644 index 00000000..7ff7b873 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_management diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_marketing.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_marketing.yaml new file mode 100644 index 00000000..c0e669f7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_marketing diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_medical_genetics.yaml new file mode 100644 index 00000000..83e52445 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_miscellaneous.yaml new file mode 100644 index 00000000..f1798792 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_moral_disputes.yaml new file mode 100644 index 00000000..1f03ac09 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_moral_scenarios.yaml new file mode 100644 index 00000000..fe7f58d2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_nutrition.yaml new file mode 100644 index 00000000..79207a87 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_philosophy.yaml new file mode 100644 index 00000000..ae533079 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_prehistory.yaml new file mode 100644 index 00000000..1c602c4f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_accounting.yaml new file mode 100644 index 00000000..ebdef8a8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_law.yaml new file mode 100644 index 00000000..3645c38a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_medicine.yaml new file mode 100644 index 00000000..d40f577d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_psychology.yaml new file mode 100644 index 00000000..edf83106 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_public_relations.yaml new file mode 100644 index 00000000..f897662c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_security_studies.yaml new file mode 100644 index 00000000..9ad4fb5c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_sociology.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_sociology.yaml new file mode 100644 index 00000000..4b869606 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_sociology diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_us_foreign_policy.yaml new file mode 100644 index 00000000..522778de --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_virology.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_virology.yaml new file mode 100644 index 00000000..8b3cbc8d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_virology diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_world_religions.yaml new file mode 100644 index 00000000..1d7df52b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/sv/utils.py b/lm_eval/tasks/global_mmlu/full/sv/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw.yaml b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw.yaml new file mode 100644 index 00000000..274543cf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_sw +task: + - global_mmlu_full_sw_stem + - global_mmlu_full_sw_other + - global_mmlu_full_sw_social_sciences + - global_mmlu_full_sw_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_humanities.yaml b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_humanities.yaml new file mode 100644 index 00000000..02168dff --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sw_humanities +task: + - global_mmlu_full_sw_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_other.yaml b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_other.yaml new file mode 100644 index 00000000..9fa28a16 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sw_other +task: + - global_mmlu_full_sw_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_social_sciences.yaml new file mode 100644 index 00000000..ad318442 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sw_social_sciences +task: + - global_mmlu_full_sw_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_stem.yaml b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_stem.yaml new file mode 100644 index 00000000..6f23cae8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sw_stem +task: + - global_mmlu_full_sw_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sw/_sw_template_yaml b/lm_eval/tasks/global_mmlu/full/sw/_sw_template_yaml new file mode 100644 index 00000000..58cf5322 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/_sw_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: sw +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_abstract_algebra.yaml new file mode 100644 index 00000000..187229fb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_anatomy.yaml new file mode 100644 index 00000000..3d0d4c5c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_astronomy.yaml new file mode 100644 index 00000000..0639b390 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_business_ethics.yaml new file mode 100644 index 00000000..a729c9da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_clinical_knowledge.yaml new file mode 100644 index 00000000..c6b83623 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_biology.yaml new file mode 100644 index 00000000..1856b934 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_chemistry.yaml new file mode 100644 index 00000000..5ad547ff --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_computer_science.yaml new file mode 100644 index 00000000..ff8d8741 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_mathematics.yaml new file mode 100644 index 00000000..02f53a4a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_medicine.yaml new file mode 100644 index 00000000..b9f4cc6c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_physics.yaml new file mode 100644 index 00000000..bcca5b3f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_computer_security.yaml new file mode 100644 index 00000000..434d2faa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_conceptual_physics.yaml new file mode 100644 index 00000000..2c1c9d41 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_econometrics.yaml new file mode 100644 index 00000000..2a907de6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_electrical_engineering.yaml new file mode 100644 index 00000000..1ae86a7c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_elementary_mathematics.yaml new file mode 100644 index 00000000..05871f25 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_formal_logic.yaml new file mode 100644 index 00000000..8d0de407 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_global_facts.yaml new file mode 100644 index 00000000..29bec055 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_biology.yaml new file mode 100644 index 00000000..2e49866a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_chemistry.yaml new file mode 100644 index 00000000..a7adbd97 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_computer_science.yaml new file mode 100644 index 00000000..2e65ab5a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_european_history.yaml new file mode 100644 index 00000000..7352ad72 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_geography.yaml new file mode 100644 index 00000000..797932ba --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_government_and_politics.yaml new file mode 100644 index 00000000..602d71ff --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_macroeconomics.yaml new file mode 100644 index 00000000..a91dd829 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_mathematics.yaml new file mode 100644 index 00000000..c19b28da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_microeconomics.yaml new file mode 100644 index 00000000..7a9c63bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_physics.yaml new file mode 100644 index 00000000..239eac65 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_psychology.yaml new file mode 100644 index 00000000..b4f19d84 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_statistics.yaml new file mode 100644 index 00000000..5725af63 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_us_history.yaml new file mode 100644 index 00000000..1d080340 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_world_history.yaml new file mode 100644 index 00000000..cfe5a9e7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_human_aging.yaml new file mode 100644 index 00000000..ba20e932 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_human_sexuality.yaml new file mode 100644 index 00000000..4609bea0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_international_law.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_international_law.yaml new file mode 100644 index 00000000..bbf616b1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_international_law diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_jurisprudence.yaml new file mode 100644 index 00000000..6781f2d5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_logical_fallacies.yaml new file mode 100644 index 00000000..1f862917 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_machine_learning.yaml new file mode 100644 index 00000000..9eb51cfb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_management.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_management.yaml new file mode 100644 index 00000000..5b0e9e67 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_management diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_marketing.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_marketing.yaml new file mode 100644 index 00000000..fb65e87e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_marketing diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_medical_genetics.yaml new file mode 100644 index 00000000..10d4db0d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_miscellaneous.yaml new file mode 100644 index 00000000..b337d0ab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_moral_disputes.yaml new file mode 100644 index 00000000..f44bfa0d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_moral_scenarios.yaml new file mode 100644 index 00000000..eabd5a91 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_nutrition.yaml new file mode 100644 index 00000000..41c64458 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_philosophy.yaml new file mode 100644 index 00000000..96edac99 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_prehistory.yaml new file mode 100644 index 00000000..db94a2ff --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_accounting.yaml new file mode 100644 index 00000000..7cd19d35 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_law.yaml new file mode 100644 index 00000000..9434ae4c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_medicine.yaml new file mode 100644 index 00000000..cf35b9c6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_psychology.yaml new file mode 100644 index 00000000..7570e288 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_public_relations.yaml new file mode 100644 index 00000000..54c094db --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_security_studies.yaml new file mode 100644 index 00000000..c8d5a42c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_sociology.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_sociology.yaml new file mode 100644 index 00000000..79d51a58 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_sociology diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_us_foreign_policy.yaml new file mode 100644 index 00000000..523b1572 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_virology.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_virology.yaml new file mode 100644 index 00000000..43179ff8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_virology diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_world_religions.yaml new file mode 100644 index 00000000..bef7b7f8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/sw/utils.py b/lm_eval/tasks/global_mmlu/full/sw/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te.yaml b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te.yaml new file mode 100644 index 00000000..5ef0f7ab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_te +task: + - global_mmlu_full_te_stem + - global_mmlu_full_te_other + - global_mmlu_full_te_social_sciences + - global_mmlu_full_te_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_humanities.yaml b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_humanities.yaml new file mode 100644 index 00000000..7a3c479e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_te_humanities +task: + - global_mmlu_full_te_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_other.yaml b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_other.yaml new file mode 100644 index 00000000..2932844a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_te_other +task: + - global_mmlu_full_te_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_social_sciences.yaml new file mode 100644 index 00000000..25e721db --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_te_social_sciences +task: + - global_mmlu_full_te_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_stem.yaml b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_stem.yaml new file mode 100644 index 00000000..fe2426ca --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_te_stem +task: + - global_mmlu_full_te_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/te/_te_template_yaml b/lm_eval/tasks/global_mmlu/full/te/_te_template_yaml new file mode 100644 index 00000000..d7b1190d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/_te_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: te +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_abstract_algebra.yaml new file mode 100644 index 00000000..e922fd08 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_anatomy.yaml new file mode 100644 index 00000000..00582018 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_astronomy.yaml new file mode 100644 index 00000000..5bc5e76e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_business_ethics.yaml new file mode 100644 index 00000000..7b440102 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_clinical_knowledge.yaml new file mode 100644 index 00000000..90e56184 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_biology.yaml new file mode 100644 index 00000000..0f036e60 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_chemistry.yaml new file mode 100644 index 00000000..ccdb849a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_computer_science.yaml new file mode 100644 index 00000000..f11e5657 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_mathematics.yaml new file mode 100644 index 00000000..c5022ce2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_medicine.yaml new file mode 100644 index 00000000..bd5219f0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_physics.yaml new file mode 100644 index 00000000..88dad05a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_computer_security.yaml new file mode 100644 index 00000000..0e8f37fc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_conceptual_physics.yaml new file mode 100644 index 00000000..f0527625 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_econometrics.yaml new file mode 100644 index 00000000..cf008a67 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_electrical_engineering.yaml new file mode 100644 index 00000000..97169e93 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_elementary_mathematics.yaml new file mode 100644 index 00000000..f3edc896 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_formal_logic.yaml new file mode 100644 index 00000000..d4c182d1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_global_facts.yaml new file mode 100644 index 00000000..53b52f4d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_biology.yaml new file mode 100644 index 00000000..5f02170f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_chemistry.yaml new file mode 100644 index 00000000..c77d30aa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_computer_science.yaml new file mode 100644 index 00000000..7f388a06 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_european_history.yaml new file mode 100644 index 00000000..75d54d72 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_geography.yaml new file mode 100644 index 00000000..383596ff --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_government_and_politics.yaml new file mode 100644 index 00000000..8db56a85 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_macroeconomics.yaml new file mode 100644 index 00000000..bd471b8d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_mathematics.yaml new file mode 100644 index 00000000..58f577ed --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_microeconomics.yaml new file mode 100644 index 00000000..400a3805 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_physics.yaml new file mode 100644 index 00000000..694ddc30 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_psychology.yaml new file mode 100644 index 00000000..b900af19 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_statistics.yaml new file mode 100644 index 00000000..3492e724 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_us_history.yaml new file mode 100644 index 00000000..48a2d75a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_world_history.yaml new file mode 100644 index 00000000..7e95f7ea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_human_aging.yaml new file mode 100644 index 00000000..dc44c1b0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_human_sexuality.yaml new file mode 100644 index 00000000..d7631419 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_international_law.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_international_law.yaml new file mode 100644 index 00000000..0c2c7862 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_international_law diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_jurisprudence.yaml new file mode 100644 index 00000000..718cd9fa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_logical_fallacies.yaml new file mode 100644 index 00000000..7bb9170c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_machine_learning.yaml new file mode 100644 index 00000000..12355538 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_management.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_management.yaml new file mode 100644 index 00000000..f092416f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_management diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_marketing.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_marketing.yaml new file mode 100644 index 00000000..15b84b46 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_marketing diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_medical_genetics.yaml new file mode 100644 index 00000000..8f0730be --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_miscellaneous.yaml new file mode 100644 index 00000000..53487f55 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_moral_disputes.yaml new file mode 100644 index 00000000..fca8df9b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_moral_scenarios.yaml new file mode 100644 index 00000000..d87f6b02 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_nutrition.yaml new file mode 100644 index 00000000..9348a76e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_philosophy.yaml new file mode 100644 index 00000000..c8efe8d9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_prehistory.yaml new file mode 100644 index 00000000..b702542e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_accounting.yaml new file mode 100644 index 00000000..045b6e1c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_law.yaml new file mode 100644 index 00000000..5e5fa308 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_medicine.yaml new file mode 100644 index 00000000..d4ede33f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_psychology.yaml new file mode 100644 index 00000000..cb1906d4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_public_relations.yaml new file mode 100644 index 00000000..1ac09ce0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_security_studies.yaml new file mode 100644 index 00000000..bbb7bc7c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_sociology.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_sociology.yaml new file mode 100644 index 00000000..e080e082 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_sociology diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_us_foreign_policy.yaml new file mode 100644 index 00000000..338f0809 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_virology.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_virology.yaml new file mode 100644 index 00000000..1f5e38a9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_virology diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_world_religions.yaml new file mode 100644 index 00000000..4da26e3e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/te/utils.py b/lm_eval/tasks/global_mmlu/full/te/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr.yaml b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr.yaml new file mode 100644 index 00000000..8cd3d3f3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_tr +task: + - global_mmlu_full_tr_stem + - global_mmlu_full_tr_other + - global_mmlu_full_tr_social_sciences + - global_mmlu_full_tr_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_humanities.yaml b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_humanities.yaml new file mode 100644 index 00000000..f4dade15 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_tr_humanities +task: + - global_mmlu_full_tr_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_other.yaml b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_other.yaml new file mode 100644 index 00000000..e80a5b9d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_tr_other +task: + - global_mmlu_full_tr_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_social_sciences.yaml new file mode 100644 index 00000000..56fc20e1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_tr_social_sciences +task: + - global_mmlu_full_tr_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_stem.yaml b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_stem.yaml new file mode 100644 index 00000000..51f9bb3d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_tr_stem +task: + - global_mmlu_full_tr_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/tr/_tr_template_yaml b/lm_eval/tasks/global_mmlu/full/tr/_tr_template_yaml new file mode 100644 index 00000000..e322bee6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/_tr_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: tr +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_abstract_algebra.yaml new file mode 100644 index 00000000..1e821573 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_anatomy.yaml new file mode 100644 index 00000000..44440225 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_astronomy.yaml new file mode 100644 index 00000000..e85390bf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_business_ethics.yaml new file mode 100644 index 00000000..4b1afc9c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_clinical_knowledge.yaml new file mode 100644 index 00000000..bdfa69e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_biology.yaml new file mode 100644 index 00000000..df43a67c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_chemistry.yaml new file mode 100644 index 00000000..af2b8b3e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_computer_science.yaml new file mode 100644 index 00000000..622854f4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_mathematics.yaml new file mode 100644 index 00000000..902bd9c1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_medicine.yaml new file mode 100644 index 00000000..6b44d0d1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_physics.yaml new file mode 100644 index 00000000..27540d97 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_computer_security.yaml new file mode 100644 index 00000000..dbcabeed --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_conceptual_physics.yaml new file mode 100644 index 00000000..628a4fcf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_econometrics.yaml new file mode 100644 index 00000000..6feb236f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_electrical_engineering.yaml new file mode 100644 index 00000000..9a2a8665 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_elementary_mathematics.yaml new file mode 100644 index 00000000..ffc6dee7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_formal_logic.yaml new file mode 100644 index 00000000..77c189a0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_global_facts.yaml new file mode 100644 index 00000000..a756d102 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_biology.yaml new file mode 100644 index 00000000..51e7dd9e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_chemistry.yaml new file mode 100644 index 00000000..077476ae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_computer_science.yaml new file mode 100644 index 00000000..cb60e042 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_european_history.yaml new file mode 100644 index 00000000..2b989e05 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_geography.yaml new file mode 100644 index 00000000..8a0c4d90 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_government_and_politics.yaml new file mode 100644 index 00000000..2a585f02 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_macroeconomics.yaml new file mode 100644 index 00000000..f88e9831 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_mathematics.yaml new file mode 100644 index 00000000..e880b0b5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_microeconomics.yaml new file mode 100644 index 00000000..5527bed2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_physics.yaml new file mode 100644 index 00000000..da93a96e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_psychology.yaml new file mode 100644 index 00000000..a28e110c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_statistics.yaml new file mode 100644 index 00000000..93871dcf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_us_history.yaml new file mode 100644 index 00000000..507a4d5c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_world_history.yaml new file mode 100644 index 00000000..60cc713e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_human_aging.yaml new file mode 100644 index 00000000..8e48bf12 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_human_sexuality.yaml new file mode 100644 index 00000000..84a95850 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_international_law.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_international_law.yaml new file mode 100644 index 00000000..d0dc429f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_international_law diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_jurisprudence.yaml new file mode 100644 index 00000000..ea3b7a51 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_logical_fallacies.yaml new file mode 100644 index 00000000..cd61d7d7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_machine_learning.yaml new file mode 100644 index 00000000..b0e785c3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_management.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_management.yaml new file mode 100644 index 00000000..5ce0d753 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_management diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_marketing.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_marketing.yaml new file mode 100644 index 00000000..8ffd4986 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_marketing diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_medical_genetics.yaml new file mode 100644 index 00000000..43814b40 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_miscellaneous.yaml new file mode 100644 index 00000000..e21cfcf6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_moral_disputes.yaml new file mode 100644 index 00000000..88fbfbe2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_moral_scenarios.yaml new file mode 100644 index 00000000..9f92f855 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_nutrition.yaml new file mode 100644 index 00000000..31b39c38 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_philosophy.yaml new file mode 100644 index 00000000..283a2b89 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_prehistory.yaml new file mode 100644 index 00000000..e4c17014 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_accounting.yaml new file mode 100644 index 00000000..c69f14f7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_law.yaml new file mode 100644 index 00000000..8f5e97c6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_medicine.yaml new file mode 100644 index 00000000..00a5f32a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_psychology.yaml new file mode 100644 index 00000000..c8571bdb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_public_relations.yaml new file mode 100644 index 00000000..539f8da6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_security_studies.yaml new file mode 100644 index 00000000..4203e365 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_sociology.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_sociology.yaml new file mode 100644 index 00000000..9cf6352c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_sociology diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_us_foreign_policy.yaml new file mode 100644 index 00000000..b86a699b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_virology.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_virology.yaml new file mode 100644 index 00000000..001cbb28 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_virology diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_world_religions.yaml new file mode 100644 index 00000000..1f1d4e4f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/tr/utils.py b/lm_eval/tasks/global_mmlu/full/tr/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk.yaml b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk.yaml new file mode 100644 index 00000000..e880be32 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_uk +task: + - global_mmlu_full_uk_stem + - global_mmlu_full_uk_other + - global_mmlu_full_uk_social_sciences + - global_mmlu_full_uk_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_humanities.yaml b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_humanities.yaml new file mode 100644 index 00000000..b3ec01db --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_uk_humanities +task: + - global_mmlu_full_uk_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_other.yaml b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_other.yaml new file mode 100644 index 00000000..176b1861 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_uk_other +task: + - global_mmlu_full_uk_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_social_sciences.yaml new file mode 100644 index 00000000..66b36a60 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_uk_social_sciences +task: + - global_mmlu_full_uk_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_stem.yaml b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_stem.yaml new file mode 100644 index 00000000..4deba657 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_uk_stem +task: + - global_mmlu_full_uk_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/uk/_uk_template_yaml b/lm_eval/tasks/global_mmlu/full/uk/_uk_template_yaml new file mode 100644 index 00000000..5765ce13 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/_uk_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: uk +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_abstract_algebra.yaml new file mode 100644 index 00000000..ce37c715 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_anatomy.yaml new file mode 100644 index 00000000..db1433d5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_astronomy.yaml new file mode 100644 index 00000000..6b123ece --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_business_ethics.yaml new file mode 100644 index 00000000..775d2f2c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_clinical_knowledge.yaml new file mode 100644 index 00000000..5f71076d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_biology.yaml new file mode 100644 index 00000000..92342ac7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_chemistry.yaml new file mode 100644 index 00000000..71384a8b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_computer_science.yaml new file mode 100644 index 00000000..6013afe1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_mathematics.yaml new file mode 100644 index 00000000..27b60491 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_medicine.yaml new file mode 100644 index 00000000..87131c25 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_physics.yaml new file mode 100644 index 00000000..93109632 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_computer_security.yaml new file mode 100644 index 00000000..0f11fcce --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_conceptual_physics.yaml new file mode 100644 index 00000000..7ff9715a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_econometrics.yaml new file mode 100644 index 00000000..ba92e4b8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_electrical_engineering.yaml new file mode 100644 index 00000000..3a1c86ff --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_elementary_mathematics.yaml new file mode 100644 index 00000000..7d80cce7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_formal_logic.yaml new file mode 100644 index 00000000..9f8a4091 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_global_facts.yaml new file mode 100644 index 00000000..ebd6c2da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_biology.yaml new file mode 100644 index 00000000..a8b0cf3a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_chemistry.yaml new file mode 100644 index 00000000..010dbec3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_computer_science.yaml new file mode 100644 index 00000000..9a270144 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_european_history.yaml new file mode 100644 index 00000000..52e80017 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_geography.yaml new file mode 100644 index 00000000..4f41dd3d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_government_and_politics.yaml new file mode 100644 index 00000000..72c589ef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_macroeconomics.yaml new file mode 100644 index 00000000..e70675d9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_mathematics.yaml new file mode 100644 index 00000000..e29c558e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_microeconomics.yaml new file mode 100644 index 00000000..6b735495 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_physics.yaml new file mode 100644 index 00000000..69a03c06 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_psychology.yaml new file mode 100644 index 00000000..9b02711c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_statistics.yaml new file mode 100644 index 00000000..60cc0cdd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_us_history.yaml new file mode 100644 index 00000000..b62244eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_world_history.yaml new file mode 100644 index 00000000..57667edc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_human_aging.yaml new file mode 100644 index 00000000..02804890 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_human_sexuality.yaml new file mode 100644 index 00000000..37382bab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_international_law.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_international_law.yaml new file mode 100644 index 00000000..d1b046d7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_international_law diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_jurisprudence.yaml new file mode 100644 index 00000000..12b9da52 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_logical_fallacies.yaml new file mode 100644 index 00000000..abb2de2a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_machine_learning.yaml new file mode 100644 index 00000000..7a1a6f34 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_management.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_management.yaml new file mode 100644 index 00000000..ec4cb17d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_management diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_marketing.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_marketing.yaml new file mode 100644 index 00000000..afbdaee2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_marketing diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_medical_genetics.yaml new file mode 100644 index 00000000..bc1fe1bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_miscellaneous.yaml new file mode 100644 index 00000000..8f3b18f8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_moral_disputes.yaml new file mode 100644 index 00000000..34b54e34 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_moral_scenarios.yaml new file mode 100644 index 00000000..38706977 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_nutrition.yaml new file mode 100644 index 00000000..9f9dd1fe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_philosophy.yaml new file mode 100644 index 00000000..4e981008 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_prehistory.yaml new file mode 100644 index 00000000..08e3c2af --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_accounting.yaml new file mode 100644 index 00000000..dc02a7b2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_law.yaml new file mode 100644 index 00000000..7090a6e1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_medicine.yaml new file mode 100644 index 00000000..0b43dcfb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_psychology.yaml new file mode 100644 index 00000000..b279a94c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_public_relations.yaml new file mode 100644 index 00000000..3b45dc62 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_security_studies.yaml new file mode 100644 index 00000000..4ea308da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_sociology.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_sociology.yaml new file mode 100644 index 00000000..a7aa08ec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_sociology diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_us_foreign_policy.yaml new file mode 100644 index 00000000..d089e778 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_virology.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_virology.yaml new file mode 100644 index 00000000..41b627f5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_virology diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_world_religions.yaml new file mode 100644 index 00000000..f5d6d415 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/uk/utils.py b/lm_eval/tasks/global_mmlu/full/uk/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi.yaml b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi.yaml new file mode 100644 index 00000000..d6413b35 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_vi +task: + - global_mmlu_full_vi_stem + - global_mmlu_full_vi_other + - global_mmlu_full_vi_social_sciences + - global_mmlu_full_vi_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_humanities.yaml b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_humanities.yaml new file mode 100644 index 00000000..7a05acca --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_vi_humanities +task: + - global_mmlu_full_vi_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_other.yaml b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_other.yaml new file mode 100644 index 00000000..880bab9a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_vi_other +task: + - global_mmlu_full_vi_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_social_sciences.yaml new file mode 100644 index 00000000..6da224f2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_vi_social_sciences +task: + - global_mmlu_full_vi_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_stem.yaml b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_stem.yaml new file mode 100644 index 00000000..12526ce7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_vi_stem +task: + - global_mmlu_full_vi_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/vi/_vi_template_yaml b/lm_eval/tasks/global_mmlu/full/vi/_vi_template_yaml new file mode 100644 index 00000000..5a0ca817 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/_vi_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: vi +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_abstract_algebra.yaml new file mode 100644 index 00000000..47dc80ce --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_anatomy.yaml new file mode 100644 index 00000000..d29cb583 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_astronomy.yaml new file mode 100644 index 00000000..3e3ba1dc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_business_ethics.yaml new file mode 100644 index 00000000..3afecdc1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_clinical_knowledge.yaml new file mode 100644 index 00000000..34a90a8e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_biology.yaml new file mode 100644 index 00000000..63a4c772 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_chemistry.yaml new file mode 100644 index 00000000..f7226e02 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_computer_science.yaml new file mode 100644 index 00000000..90a9e0b2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_mathematics.yaml new file mode 100644 index 00000000..a09173d6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_medicine.yaml new file mode 100644 index 00000000..22dc78bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_physics.yaml new file mode 100644 index 00000000..a6f8dbca --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_computer_security.yaml new file mode 100644 index 00000000..4d4b3d60 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_conceptual_physics.yaml new file mode 100644 index 00000000..6c501d0a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_econometrics.yaml new file mode 100644 index 00000000..d0936b3b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_electrical_engineering.yaml new file mode 100644 index 00000000..3b23387f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_elementary_mathematics.yaml new file mode 100644 index 00000000..9c098266 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_formal_logic.yaml new file mode 100644 index 00000000..21a28bb4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_global_facts.yaml new file mode 100644 index 00000000..a912dba1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_biology.yaml new file mode 100644 index 00000000..e334fb1c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_chemistry.yaml new file mode 100644 index 00000000..ba98297e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_computer_science.yaml new file mode 100644 index 00000000..22e0b00a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_european_history.yaml new file mode 100644 index 00000000..06507b7c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_geography.yaml new file mode 100644 index 00000000..d6eeec7a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_government_and_politics.yaml new file mode 100644 index 00000000..2faf2b09 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_macroeconomics.yaml new file mode 100644 index 00000000..16ed50b8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_mathematics.yaml new file mode 100644 index 00000000..1cad75ec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_microeconomics.yaml new file mode 100644 index 00000000..4499711f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_physics.yaml new file mode 100644 index 00000000..bb92f446 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_psychology.yaml new file mode 100644 index 00000000..0a12e4de --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_statistics.yaml new file mode 100644 index 00000000..3ae34e4d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_us_history.yaml new file mode 100644 index 00000000..9ad96b12 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_world_history.yaml new file mode 100644 index 00000000..5df3661c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_human_aging.yaml new file mode 100644 index 00000000..57820fab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_human_sexuality.yaml new file mode 100644 index 00000000..5b53962b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_international_law.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_international_law.yaml new file mode 100644 index 00000000..5f81b09e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_international_law diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_jurisprudence.yaml new file mode 100644 index 00000000..52ec47d4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_logical_fallacies.yaml new file mode 100644 index 00000000..ed89994d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_machine_learning.yaml new file mode 100644 index 00000000..258bd8c4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_management.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_management.yaml new file mode 100644 index 00000000..1bd2f606 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_management diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_marketing.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_marketing.yaml new file mode 100644 index 00000000..951a3642 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_marketing diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_medical_genetics.yaml new file mode 100644 index 00000000..9d606007 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_miscellaneous.yaml new file mode 100644 index 00000000..a0cae1b0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_moral_disputes.yaml new file mode 100644 index 00000000..07987487 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_moral_scenarios.yaml new file mode 100644 index 00000000..6a852bc6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_nutrition.yaml new file mode 100644 index 00000000..42b198f3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_philosophy.yaml new file mode 100644 index 00000000..a7ffc316 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_prehistory.yaml new file mode 100644 index 00000000..96349674 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_accounting.yaml new file mode 100644 index 00000000..da949e34 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_law.yaml new file mode 100644 index 00000000..81c74535 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_medicine.yaml new file mode 100644 index 00000000..7315b353 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_psychology.yaml new file mode 100644 index 00000000..f2eb1652 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_public_relations.yaml new file mode 100644 index 00000000..12933f08 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_security_studies.yaml new file mode 100644 index 00000000..7e90ba55 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_sociology.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_sociology.yaml new file mode 100644 index 00000000..056c757b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_sociology diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_us_foreign_policy.yaml new file mode 100644 index 00000000..5bcd95d6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_virology.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_virology.yaml new file mode 100644 index 00000000..775b0cca --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_virology diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_world_religions.yaml new file mode 100644 index 00000000..db6ba6e0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/vi/utils.py b/lm_eval/tasks/global_mmlu/full/vi/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo.yaml b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo.yaml new file mode 100644 index 00000000..ba9f2460 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_yo +task: + - global_mmlu_full_yo_stem + - global_mmlu_full_yo_other + - global_mmlu_full_yo_social_sciences + - global_mmlu_full_yo_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_humanities.yaml b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_humanities.yaml new file mode 100644 index 00000000..4e3b3c11 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_yo_humanities +task: + - global_mmlu_full_yo_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_other.yaml b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_other.yaml new file mode 100644 index 00000000..ed81bdfe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_yo_other +task: + - global_mmlu_full_yo_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_social_sciences.yaml new file mode 100644 index 00000000..bab52fa2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_yo_social_sciences +task: + - global_mmlu_full_yo_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_stem.yaml b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_stem.yaml new file mode 100644 index 00000000..3687d569 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_yo_stem +task: + - global_mmlu_full_yo_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/yo/_yo_template_yaml b/lm_eval/tasks/global_mmlu/full/yo/_yo_template_yaml new file mode 100644 index 00000000..ceefadf5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/_yo_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: yo +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_abstract_algebra.yaml new file mode 100644 index 00000000..ef817a38 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_anatomy.yaml new file mode 100644 index 00000000..a3bae5d5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_astronomy.yaml new file mode 100644 index 00000000..b39aa143 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_business_ethics.yaml new file mode 100644 index 00000000..58832982 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_clinical_knowledge.yaml new file mode 100644 index 00000000..21dcf842 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_biology.yaml new file mode 100644 index 00000000..f3abaf24 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_chemistry.yaml new file mode 100644 index 00000000..0468634b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_computer_science.yaml new file mode 100644 index 00000000..df6e5844 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_mathematics.yaml new file mode 100644 index 00000000..0542a4fe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_medicine.yaml new file mode 100644 index 00000000..cce0b497 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_physics.yaml new file mode 100644 index 00000000..84ca1413 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_computer_security.yaml new file mode 100644 index 00000000..001689e9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_conceptual_physics.yaml new file mode 100644 index 00000000..dcff962c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_econometrics.yaml new file mode 100644 index 00000000..6d055d6d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_electrical_engineering.yaml new file mode 100644 index 00000000..c21f7f02 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_elementary_mathematics.yaml new file mode 100644 index 00000000..9b6173f0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_formal_logic.yaml new file mode 100644 index 00000000..2ffc9740 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_global_facts.yaml new file mode 100644 index 00000000..394a143a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_biology.yaml new file mode 100644 index 00000000..f0de1887 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_chemistry.yaml new file mode 100644 index 00000000..02b16fae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_computer_science.yaml new file mode 100644 index 00000000..94733faa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_european_history.yaml new file mode 100644 index 00000000..6ec4070e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_geography.yaml new file mode 100644 index 00000000..4ab051d9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_government_and_politics.yaml new file mode 100644 index 00000000..bedf7f20 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_macroeconomics.yaml new file mode 100644 index 00000000..cb486709 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_mathematics.yaml new file mode 100644 index 00000000..cea21a89 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_microeconomics.yaml new file mode 100644 index 00000000..a8eae6cd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_physics.yaml new file mode 100644 index 00000000..cdaca54f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_psychology.yaml new file mode 100644 index 00000000..ef3d7527 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_statistics.yaml new file mode 100644 index 00000000..0ec62db0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_us_history.yaml new file mode 100644 index 00000000..30c8573c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_world_history.yaml new file mode 100644 index 00000000..52f91d43 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_human_aging.yaml new file mode 100644 index 00000000..4ab0ec2b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_human_sexuality.yaml new file mode 100644 index 00000000..f510c2d1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_international_law.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_international_law.yaml new file mode 100644 index 00000000..9b657110 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_international_law diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_jurisprudence.yaml new file mode 100644 index 00000000..e3ac0a52 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_logical_fallacies.yaml new file mode 100644 index 00000000..a7a9e718 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_machine_learning.yaml new file mode 100644 index 00000000..4a61d3ae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_management.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_management.yaml new file mode 100644 index 00000000..92b0b526 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_management diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_marketing.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_marketing.yaml new file mode 100644 index 00000000..74c17559 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_marketing diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_medical_genetics.yaml new file mode 100644 index 00000000..cfc2c8cb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_miscellaneous.yaml new file mode 100644 index 00000000..ad12bde6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_moral_disputes.yaml new file mode 100644 index 00000000..2e85331c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_moral_scenarios.yaml new file mode 100644 index 00000000..9a6a6fc6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_nutrition.yaml new file mode 100644 index 00000000..62d9ae7b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_philosophy.yaml new file mode 100644 index 00000000..de42ec7a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_prehistory.yaml new file mode 100644 index 00000000..e2ad3236 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_accounting.yaml new file mode 100644 index 00000000..198f227b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_law.yaml new file mode 100644 index 00000000..e5942f74 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_medicine.yaml new file mode 100644 index 00000000..efd4ab7d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_psychology.yaml new file mode 100644 index 00000000..e1956c87 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_public_relations.yaml new file mode 100644 index 00000000..5c6c2b8c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_security_studies.yaml new file mode 100644 index 00000000..a12c4abd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_sociology.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_sociology.yaml new file mode 100644 index 00000000..e5747900 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_sociology diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_us_foreign_policy.yaml new file mode 100644 index 00000000..493dda39 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_virology.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_virology.yaml new file mode 100644 index 00000000..420b1b01 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_virology diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_world_religions.yaml new file mode 100644 index 00000000..c0964b30 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/yo/utils.py b/lm_eval/tasks/global_mmlu/full/yo/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh.yaml b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh.yaml new file mode 100644 index 00000000..098ec097 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_zh +task: + - global_mmlu_full_zh_stem + - global_mmlu_full_zh_other + - global_mmlu_full_zh_social_sciences + - global_mmlu_full_zh_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_humanities.yaml b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_humanities.yaml new file mode 100644 index 00000000..fb347da8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_zh_humanities +task: + - global_mmlu_full_zh_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_other.yaml b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_other.yaml new file mode 100644 index 00000000..98d4ed5e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_zh_other +task: + - global_mmlu_full_zh_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_social_sciences.yaml new file mode 100644 index 00000000..235012e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_zh_social_sciences +task: + - global_mmlu_full_zh_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_stem.yaml b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_stem.yaml new file mode 100644 index 00000000..660486a4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_zh_stem +task: + - global_mmlu_full_zh_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/zh/_zh_template_yaml b/lm_eval/tasks/global_mmlu/full/zh/_zh_template_yaml new file mode 100644 index 00000000..2c83d495 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/_zh_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: zh +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_abstract_algebra.yaml new file mode 100644 index 00000000..42ea6276 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_anatomy.yaml new file mode 100644 index 00000000..45001d14 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_astronomy.yaml new file mode 100644 index 00000000..37183dc7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_business_ethics.yaml new file mode 100644 index 00000000..bbb5ea38 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_clinical_knowledge.yaml new file mode 100644 index 00000000..d90ee0ea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_biology.yaml new file mode 100644 index 00000000..ba2031fe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_chemistry.yaml new file mode 100644 index 00000000..860761b4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_computer_science.yaml new file mode 100644 index 00000000..53d01965 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_mathematics.yaml new file mode 100644 index 00000000..dbd2e4be --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_medicine.yaml new file mode 100644 index 00000000..523d6b30 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_physics.yaml new file mode 100644 index 00000000..0a08214f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_computer_security.yaml new file mode 100644 index 00000000..99332b35 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_conceptual_physics.yaml new file mode 100644 index 00000000..b042cc8b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_econometrics.yaml new file mode 100644 index 00000000..bf920112 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_electrical_engineering.yaml new file mode 100644 index 00000000..b30acad7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_elementary_mathematics.yaml new file mode 100644 index 00000000..3b108c42 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_formal_logic.yaml new file mode 100644 index 00000000..64775599 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_global_facts.yaml new file mode 100644 index 00000000..07d390aa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_biology.yaml new file mode 100644 index 00000000..28b2bdaa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_chemistry.yaml new file mode 100644 index 00000000..4d084034 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_computer_science.yaml new file mode 100644 index 00000000..6232ef60 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_european_history.yaml new file mode 100644 index 00000000..70e3e52b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_geography.yaml new file mode 100644 index 00000000..fe6cb913 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_government_and_politics.yaml new file mode 100644 index 00000000..cfa7213a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_macroeconomics.yaml new file mode 100644 index 00000000..ca0b7ad8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_mathematics.yaml new file mode 100644 index 00000000..38868e96 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_microeconomics.yaml new file mode 100644 index 00000000..b79237d2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_physics.yaml new file mode 100644 index 00000000..6355da2f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_psychology.yaml new file mode 100644 index 00000000..f2238867 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_statistics.yaml new file mode 100644 index 00000000..9aac2097 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_us_history.yaml new file mode 100644 index 00000000..47d8355f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_world_history.yaml new file mode 100644 index 00000000..c1f6671f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_human_aging.yaml new file mode 100644 index 00000000..d6941ff7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_human_sexuality.yaml new file mode 100644 index 00000000..ee228b22 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_international_law.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_international_law.yaml new file mode 100644 index 00000000..07b1ebd1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_international_law diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_jurisprudence.yaml new file mode 100644 index 00000000..ab10ffac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_logical_fallacies.yaml new file mode 100644 index 00000000..451260b5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_machine_learning.yaml new file mode 100644 index 00000000..508d14f6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_management.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_management.yaml new file mode 100644 index 00000000..9db0b32b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_management diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_marketing.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_marketing.yaml new file mode 100644 index 00000000..a7142ce4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_marketing diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_medical_genetics.yaml new file mode 100644 index 00000000..22053090 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_miscellaneous.yaml new file mode 100644 index 00000000..5b479c9b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_moral_disputes.yaml new file mode 100644 index 00000000..58d13a99 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_moral_scenarios.yaml new file mode 100644 index 00000000..95d91dfd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_nutrition.yaml new file mode 100644 index 00000000..57452a39 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_philosophy.yaml new file mode 100644 index 00000000..20e237b2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_prehistory.yaml new file mode 100644 index 00000000..56358fe7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_accounting.yaml new file mode 100644 index 00000000..630681ab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_law.yaml new file mode 100644 index 00000000..e48f35cb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_medicine.yaml new file mode 100644 index 00000000..f75432cd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_psychology.yaml new file mode 100644 index 00000000..fbbf45ad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_public_relations.yaml new file mode 100644 index 00000000..f760d2a2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_security_studies.yaml new file mode 100644 index 00000000..1dafaf5d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_sociology.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_sociology.yaml new file mode 100644 index 00000000..549f4ef1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_sociology diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_us_foreign_policy.yaml new file mode 100644 index 00000000..597dcfa1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_virology.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_virology.yaml new file mode 100644 index 00000000..1984c6b0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_virology diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_world_religions.yaml new file mode 100644 index 00000000..fa15c0cb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/zh/utils.py b/lm_eval/tasks/global_mmlu/full/zh/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) -- GitLab From ff2c49ff2b5fae3cdc1bb5fac4f9d8c9b02694b7 Mon Sep 17 00:00:00 2001 From: Gyouk Chu <94156717+GyoukChu@users.noreply.github.com> Date: Tue, 21 Jan 2025 06:05:00 +0900 Subject: [PATCH 06/19] Update KorMedMCQA: ver 2.0 (#2540) * Update KorMedMCQA: ver 2.0 * Fix pre-commit formatting issues * Update KorMedMCQA v2.0 * pre-commit --- lm_eval/tasks/kormedmcqa/README.md | 9 ++++--- lm_eval/tasks/kormedmcqa/_kormedmcqa.yaml | 3 ++- ...{kormedmcqa_doctor.yaml => _template_yaml} | 17 ++++++++---- lm_eval/tasks/kormedmcqa/dentist.yaml | 3 +++ lm_eval/tasks/kormedmcqa/doctor.yaml | 3 +++ .../tasks/kormedmcqa/kormedmcqa_nurse.yaml | 26 ------------------- .../tasks/kormedmcqa/kormedmcqa_pharm.yaml | 26 ------------------- lm_eval/tasks/kormedmcqa/nurse.yaml | 3 +++ lm_eval/tasks/kormedmcqa/pharm.yaml | 3 +++ 9 files changed, 31 insertions(+), 62 deletions(-) rename lm_eval/tasks/kormedmcqa/{kormedmcqa_doctor.yaml => _template_yaml} (62%) create mode 100644 lm_eval/tasks/kormedmcqa/dentist.yaml create mode 100644 lm_eval/tasks/kormedmcqa/doctor.yaml delete mode 100644 lm_eval/tasks/kormedmcqa/kormedmcqa_nurse.yaml delete mode 100644 lm_eval/tasks/kormedmcqa/kormedmcqa_pharm.yaml create mode 100644 lm_eval/tasks/kormedmcqa/nurse.yaml create mode 100644 lm_eval/tasks/kormedmcqa/pharm.yaml diff --git a/lm_eval/tasks/kormedmcqa/README.md b/lm_eval/tasks/kormedmcqa/README.md index b4eb1134..54a666a1 100644 --- a/lm_eval/tasks/kormedmcqa/README.md +++ b/lm_eval/tasks/kormedmcqa/README.md @@ -25,20 +25,21 @@ Homepage: https://huggingface.co/datasets/sean0042/KorMedMCQA ### Groups and Tasks -* `kormedmcqa`: Runs `kormedmcqa_doctor`, `kormedmcqa_nurse`, and `kormedmcqa_pharm`. +* `kormedmcqa`: Runs `kormedmcqa_doctor`, `kormedmcqa_nurse`, `kormedmcqa_pharm`, and `kormedmcqa_dentist`. #### Tasks * `kormedmcqa_doctor`: `Official Korean Doctor Examination` * `kormedmcqa_nurse`: `Official Korean Nurse Examination` * `kormedmcqa_pharm`: `Official Korean Pharmacist Examination` +* `kormedmcqa_dentist`: `Official Korean Dentist Examination` ### Checklist For adding novel benchmarks/datasets to the library: -* [x] Is the task an existing benchmark in the literature? - * [x] Have you referenced the original paper that introduced the task? - * [x] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test? +* [ ] Is the task an existing benchmark in the literature? + * [ ] Have you referenced the original paper that introduced the task? + * [ ] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test? If other tasks on this dataset are already supported: diff --git a/lm_eval/tasks/kormedmcqa/_kormedmcqa.yaml b/lm_eval/tasks/kormedmcqa/_kormedmcqa.yaml index d6548334..cac2329e 100644 --- a/lm_eval/tasks/kormedmcqa/_kormedmcqa.yaml +++ b/lm_eval/tasks/kormedmcqa/_kormedmcqa.yaml @@ -3,9 +3,10 @@ task: - kormedmcqa_doctor - kormedmcqa_nurse - kormedmcqa_pharm + - kormedmcqa_dentist aggregate_metric_list: - metric: exact_match aggregation: mean weight_by_size: true metadata: - version: 0.0 + version: 2.0 diff --git a/lm_eval/tasks/kormedmcqa/kormedmcqa_doctor.yaml b/lm_eval/tasks/kormedmcqa/_template_yaml similarity index 62% rename from lm_eval/tasks/kormedmcqa/kormedmcqa_doctor.yaml rename to lm_eval/tasks/kormedmcqa/_template_yaml index d130dbe8..1dae2062 100644 --- a/lm_eval/tasks/kormedmcqa/kormedmcqa_doctor.yaml +++ b/lm_eval/tasks/kormedmcqa/_template_yaml @@ -1,10 +1,10 @@ -task : kormedmcqa_doctor dataset_path : sean0042/KorMedMCQA -dataset_name : doctor test_split : test -fewshot_split : dev +fewshot_split : fewshot fewshot_config: sampler: first_n + doc_to_text: "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nE. {{E}}\n정답: {{['A', 'B', 'C', 'D', 'E'][answer-1]}}\n\n" + doc_to_target: "" output_type: generate_until doc_to_text: "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nE. {{E}}\n정답:" doc_to_target: "{{['A', 'B', 'C', 'D', 'E'][answer-1]}}" @@ -15,12 +15,19 @@ metric_list: ignore_case: true ignore_punctuation: true regexes_to_ignore: - - " " + - " " + - "\n" generation_kwargs: until: - "Q:" - - "\n\n" - "" + - "<|im_end|>" - "." + - "\n\n" do_sample: false temperature: 0.0 + max_gen_toks: 1024 +metadata: + version: 2.0 +dataset_kwargs: + trust_remote_code: true diff --git a/lm_eval/tasks/kormedmcqa/dentist.yaml b/lm_eval/tasks/kormedmcqa/dentist.yaml new file mode 100644 index 00000000..6a46c771 --- /dev/null +++ b/lm_eval/tasks/kormedmcqa/dentist.yaml @@ -0,0 +1,3 @@ +include: _template_yaml +dataset_name: dentist +task: kormedmcqa_dentist diff --git a/lm_eval/tasks/kormedmcqa/doctor.yaml b/lm_eval/tasks/kormedmcqa/doctor.yaml new file mode 100644 index 00000000..aac30e4c --- /dev/null +++ b/lm_eval/tasks/kormedmcqa/doctor.yaml @@ -0,0 +1,3 @@ +include: _template_yaml +dataset_name: doctor +task: kormedmcqa_doctor diff --git a/lm_eval/tasks/kormedmcqa/kormedmcqa_nurse.yaml b/lm_eval/tasks/kormedmcqa/kormedmcqa_nurse.yaml deleted file mode 100644 index 026b6217..00000000 --- a/lm_eval/tasks/kormedmcqa/kormedmcqa_nurse.yaml +++ /dev/null @@ -1,26 +0,0 @@ -task : kormedmcqa_nurse -dataset_path : sean0042/KorMedMCQA -dataset_name : nurse -test_split : test -fewshot_split : dev -fewshot_config: - sampler: first_n -output_type: generate_until -doc_to_text: "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nE. {{E}}\n정답:" -doc_to_target: "{{['A', 'B', 'C', 'D', 'E'][answer-1]}}" -metric_list: - - metric: exact_match - aggregation: mean - higher_is_better: true - ignore_case: true - ignore_punctuation: true - regexes_to_ignore: - - " " -generation_kwargs: - until: - - "Q:" - - "\n\n" - - "" - - "." - do_sample: false - temperature: 0.0 diff --git a/lm_eval/tasks/kormedmcqa/kormedmcqa_pharm.yaml b/lm_eval/tasks/kormedmcqa/kormedmcqa_pharm.yaml deleted file mode 100644 index 91279dd7..00000000 --- a/lm_eval/tasks/kormedmcqa/kormedmcqa_pharm.yaml +++ /dev/null @@ -1,26 +0,0 @@ -task : kormedmcqa_pharm -dataset_path : sean0042/KorMedMCQA -dataset_name : pharm -test_split : test -fewshot_split : dev -fewshot_config: - sampler: first_n -output_type: generate_until -doc_to_text: "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nE. {{E}}\n정답:" -doc_to_target: "{{['A', 'B', 'C', 'D', 'E'][answer-1]}}" -metric_list: - - metric: exact_match - aggregation: mean - higher_is_better: true - ignore_case: true - ignore_punctuation: true - regexes_to_ignore: - - " " -generation_kwargs: - until: - - "Q:" - - "\n\n" - - "" - - "." - do_sample: false - temperature: 0.0 diff --git a/lm_eval/tasks/kormedmcqa/nurse.yaml b/lm_eval/tasks/kormedmcqa/nurse.yaml new file mode 100644 index 00000000..95894a5d --- /dev/null +++ b/lm_eval/tasks/kormedmcqa/nurse.yaml @@ -0,0 +1,3 @@ +include: _template_yaml +dataset_name: nurse +task: kormedmcqa_nurse diff --git a/lm_eval/tasks/kormedmcqa/pharm.yaml b/lm_eval/tasks/kormedmcqa/pharm.yaml new file mode 100644 index 00000000..8075fae3 --- /dev/null +++ b/lm_eval/tasks/kormedmcqa/pharm.yaml @@ -0,0 +1,3 @@ +include: _template_yaml +dataset_name: pharm +task: kormedmcqa_pharm -- GitLab From 88144079fc949ae58624db7af51beb37119d38c3 Mon Sep 17 00:00:00 2001 From: nike00811 Date: Tue, 21 Jan 2025 05:16:29 +0800 Subject: [PATCH 07/19] fix tmlu tmlu_taiwan_specific_tasks tag (#2420) --- lm_eval/tasks/tmlu/default/tmlu_driving_rule.yaml | 2 +- lm_eval/tasks/tmlu/default/tmlu_taiwan_tourist_resources.yaml | 2 +- lm_eval/tasks/tmlu/default/tmlu_teacher_qualification.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lm_eval/tasks/tmlu/default/tmlu_driving_rule.yaml b/lm_eval/tasks/tmlu/default/tmlu_driving_rule.yaml index 965084c8..a810322e 100644 --- a/lm_eval/tasks/tmlu/default/tmlu_driving_rule.yaml +++ b/lm_eval/tasks/tmlu/default/tmlu_driving_rule.yaml @@ -9,7 +9,7 @@ D. {{choices[3]}}{% if choices is defined and choices|length > 4 %}\nE. {{choices[4]}}{%\ \ endif %}{% if choices is defined and choices|length > 5 %}\nF. {{choices[5]}}{%\ \ endif %}\nAnswer:" -"tag": "tmlu_taiwan_specific" +"tag": "tmlu_taiwan_specific_tasks" "include": "_default_template_yaml" "task": "tmlu_driving_rule" "task_alias": "driving rule" diff --git a/lm_eval/tasks/tmlu/default/tmlu_taiwan_tourist_resources.yaml b/lm_eval/tasks/tmlu/default/tmlu_taiwan_tourist_resources.yaml index 6a1fc7b2..3fa66f65 100644 --- a/lm_eval/tasks/tmlu/default/tmlu_taiwan_tourist_resources.yaml +++ b/lm_eval/tasks/tmlu/default/tmlu_taiwan_tourist_resources.yaml @@ -9,7 +9,7 @@ D. {{choices[3]}}{% if choices is defined and choices|length > 4 %}\nE. {{choices[4]}}{%\ \ endif %}{% if choices is defined and choices|length > 5 %}\nF. {{choices[5]}}{%\ \ endif %}\nAnswer:" -"tag": "tmlu_taiwan_specific" +"tag": "tmlu_taiwan_specific_tasks" "include": "_default_template_yaml" "task": "tmlu_taiwan_tourist_resources" "task_alias": "taiwan tourist resources" diff --git a/lm_eval/tasks/tmlu/default/tmlu_teacher_qualification.yaml b/lm_eval/tasks/tmlu/default/tmlu_teacher_qualification.yaml index 987c2d7d..55e65c87 100644 --- a/lm_eval/tasks/tmlu/default/tmlu_teacher_qualification.yaml +++ b/lm_eval/tasks/tmlu/default/tmlu_teacher_qualification.yaml @@ -9,7 +9,7 @@ D. {{choices[3]}}{% if choices is defined and choices|length > 4 %}\nE. {{choices[4]}}{%\ \ endif %}{% if choices is defined and choices|length > 5 %}\nF. {{choices[5]}}{%\ \ endif %}\nAnswer:" -"tag": "tmlu_taiwan_specific" +"tag": "tmlu_taiwan_specific_tasks" "include": "_default_template_yaml" "task": "tmlu_teacher_qualification" "task_alias": "teacher qualification" -- GitLab From 12b6eeb5b01cd1fe9da103e59b85e2c06bb82c93 Mon Sep 17 00:00:00 2001 From: "Ramiro R. C." Date: Mon, 20 Jan 2025 18:33:42 -0300 Subject: [PATCH 08/19] fixed mmlu generative response extraction (#2503) * fixed mmlu generative response extraction * updated file version | added args to exact_match * fix * fix * pre-commit * fix groups --------- Co-authored-by: Baber --- lm_eval/tasks/arabicmmlu/_generate_configs.py | 82 ++++++++++--------- lm_eval/tasks/mmlu/_generate_configs.py | 1 + .../mmlu/generative/_default_template_yaml | 16 +++- lm_eval/tasks/mmlu/generative/_mmlu.yaml | 20 ++--- 4 files changed, 68 insertions(+), 51 deletions(-) diff --git a/lm_eval/tasks/arabicmmlu/_generate_configs.py b/lm_eval/tasks/arabicmmlu/_generate_configs.py index ea59fe98..5dc627e5 100644 --- a/lm_eval/tasks/arabicmmlu/_generate_configs.py +++ b/lm_eval/tasks/arabicmmlu/_generate_configs.py @@ -13,46 +13,48 @@ from tqdm import tqdm eval_logger = logging.getLogger("lm-eval") -SUBJECTS = {'Islamic Studies': 'humanities', - 'Driving Test': 'other', - 'Natural Science (Middle School)': 'stem', - 'Natural Science (Primary School)': 'stem', - 'History (Primary School)': 'humanities', - 'History (Middle School)': 'humanities', - 'History (High School)': 'humanities', - 'General Knowledge': 'other', - 'General Knowledge (Primary School)': 'other', - 'General Knowledge (Middle School)': 'other', - 'Law (Professional)': 'humanities', - 'Physics (High School)': 'stem', - 'Social Science (Middle School)': 'social_science', - 'Social Science (Primary School)': 'social_science', - 'Management (University)': 'other', - 'Arabic Language (Primary School)': 'language', - 'Arabic Language (Middle School)': 'language', - 'Arabic Language (High School)': 'language', - 'Political Science (University)': 'social_science', - 'Philosophy (High School)': 'humanities', - 'Accounting (University)': 'social_science', - 'Computer Science (University)': 'stem', - 'Computer Science (Middle School)': 'stem', - 'Computer Science (Primary School)': 'stem', - 'Computer Science (High School)': 'stem', - 'Geography (Primary School)': 'social_science', - 'Geography (Middle School)': 'social_science', - 'Geography (High School)': 'social_science', - 'Math (Primary School)': 'stem', - 'Biology (High School)': 'stem', - 'Economics (University)': 'social_science', - 'Economics (Middle School)': 'social_science', - 'Economics (High School)': 'social_science', - 'Arabic Language (General)': 'language', - 'Arabic Language (Grammar)': 'language', - 'Islamic Studies (High School)': 'humanities', - 'Islamic Studies (Middle School)': 'humanities', - 'Islamic Studies (Primary School)': 'humanities', - 'Civics (Middle School)': 'social_science', - 'Civics (High School)': 'social_science'} +SUBJECTS = { + "Islamic Studies": "humanities", + "Driving Test": "other", + "Natural Science (Middle School)": "stem", + "Natural Science (Primary School)": "stem", + "History (Primary School)": "humanities", + "History (Middle School)": "humanities", + "History (High School)": "humanities", + "General Knowledge": "other", + "General Knowledge (Primary School)": "other", + "General Knowledge (Middle School)": "other", + "Law (Professional)": "humanities", + "Physics (High School)": "stem", + "Social Science (Middle School)": "social_science", + "Social Science (Primary School)": "social_science", + "Management (University)": "other", + "Arabic Language (Primary School)": "language", + "Arabic Language (Middle School)": "language", + "Arabic Language (High School)": "language", + "Political Science (University)": "social_science", + "Philosophy (High School)": "humanities", + "Accounting (University)": "social_science", + "Computer Science (University)": "stem", + "Computer Science (Middle School)": "stem", + "Computer Science (Primary School)": "stem", + "Computer Science (High School)": "stem", + "Geography (Primary School)": "social_science", + "Geography (Middle School)": "social_science", + "Geography (High School)": "social_science", + "Math (Primary School)": "stem", + "Biology (High School)": "stem", + "Economics (University)": "social_science", + "Economics (Middle School)": "social_science", + "Economics (High School)": "social_science", + "Arabic Language (General)": "language", + "Arabic Language (Grammar)": "language", + "Islamic Studies (High School)": "humanities", + "Islamic Studies (Middle School)": "humanities", + "Islamic Studies (Primary School)": "humanities", + "Civics (Middle School)": "social_science", + "Civics (High School)": "social_science", +} def parse_args(): diff --git a/lm_eval/tasks/mmlu/_generate_configs.py b/lm_eval/tasks/mmlu/_generate_configs.py index 28b94616..58876d4c 100644 --- a/lm_eval/tasks/mmlu/_generate_configs.py +++ b/lm_eval/tasks/mmlu/_generate_configs.py @@ -1,3 +1,4 @@ +# noqa """ Take in a YAML, and output all "other" splits with this YAML """ diff --git a/lm_eval/tasks/mmlu/generative/_default_template_yaml b/lm_eval/tasks/mmlu/generative/_default_template_yaml index 1452e0f5..7281f0a1 100644 --- a/lm_eval/tasks/mmlu/generative/_default_template_yaml +++ b/lm_eval/tasks/mmlu/generative/_default_template_yaml @@ -14,7 +14,21 @@ metric_list: - metric: exact_match aggregation: mean higher_is_better: true + ignore_punctuation: true + ignore_case: true +filter_list: + - name: get_response + filter: + # Filter everything after the first break line + - function: "regex" + regex_pattern: "^(.*?)(?=\\n|$)" + # Remove leading white spaces + - function: remove_whitespace + # function to ignore right white spaces or line breaks + - function: "regex" + regex_pattern: "^(.*?)\\s*$" + - function: take_first metadata: - version: 2.0 + version: 3.0 dataset_kwargs: trust_remote_code: true diff --git a/lm_eval/tasks/mmlu/generative/_mmlu.yaml b/lm_eval/tasks/mmlu/generative/_mmlu.yaml index 1a63611b..e4f4b5d5 100644 --- a/lm_eval/tasks/mmlu/generative/_mmlu.yaml +++ b/lm_eval/tasks/mmlu/generative/_mmlu.yaml @@ -5,29 +5,29 @@ task: task: - mmlu_stem_generative aggregate_metric_list: - - metric: acc - weight_by_size: True + - metric: exact_match + weight_by_size: true - group: other task: - mmlu_other_generative aggregate_metric_list: - - metric: acc - weight_by_size: True + - metric: exact_match + weight_by_size: true - group: social sciences task: - mmlu_social_sciences_generative aggregate_metric_list: - - metric: acc - weight_by_size: True + - metric: exact_match + weight_by_size: true - group: humanities task: - mmlu_humanities_generative aggregate_metric_list: - - metric: acc - weight_by_size: True + - metric: exact_match + weight_by_size: true aggregate_metric_list: - aggregation: mean metric: exact_match - weight_by_size: True + weight_by_size: true metadata: - version: 2 + version: 3 -- GitLab From ed9c6fc8db6076cfc86fd1c660fc54c96578eacb Mon Sep 17 00:00:00 2001 From: Minho Ryu Date: Wed, 22 Jan 2025 01:46:54 +0900 Subject: [PATCH 09/19] revise mbpp prompt (#2645) --- lm_eval/tasks/mbpp/mbpp.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lm_eval/tasks/mbpp/mbpp.yaml b/lm_eval/tasks/mbpp/mbpp.yaml index 101f1988..a5b58d90 100644 --- a/lm_eval/tasks/mbpp/mbpp.yaml +++ b/lm_eval/tasks/mbpp/mbpp.yaml @@ -4,9 +4,9 @@ dataset_name: full unsafe_code: true output_type: generate_until test_split: test -doc_to_text: "You are an expert Python programmer, and here is your task: {{text}} Your code should pass these tests:\n\n{{test_list[0]}}\n{{test_list[1]}}\n{{test_list[2]}}\n[BEGIN]" +doc_to_text: "You are an expert Python programmer, and here is your task: {{text}} Your code should pass these tests:\n\n{{test_list[0]}}\n{{test_list[1]}}\n{{test_list[2]}}\n[BEGIN]\n" doc_to_target: "{% if is_fewshot is defined %}{{code}}\n[DONE]{% else %}{{test_list[0]}}\n{{test_list[1]}}\n{{test_list[2]}}{% endif %}" -target_delimiter: "\n" +target_delimiter: "" metric_list: - metric: !function utils.pass_at_1 aggregation: mean -- GitLab From b2c090cc971e911c62f6f9a848c20cafb1488ec3 Mon Sep 17 00:00:00 2001 From: Minho Ryu Date: Wed, 22 Jan 2025 01:48:22 +0900 Subject: [PATCH 10/19] aggregate by group (total and categories) (#2643) --- lm_eval/tasks/kmmlu/cot_hard/_cot_kmmlu_yaml | 3 --- lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard.yaml | 11 +++++++++++ .../cot_hard/_kmmlu_cot_hard_applied_science.yaml | 8 ++++++++ .../tasks/kmmlu/cot_hard/_kmmlu_cot_hard_humss.yaml | 8 ++++++++ .../tasks/kmmlu/cot_hard/_kmmlu_cot_hard_other.yaml | 8 ++++++++ .../tasks/kmmlu/cot_hard/_kmmlu_cot_hard_stem.yaml | 8 ++++++++ .../kmmlu/cot_hard/kmmlu_cot_hard_accounting.yaml | 3 ++- .../kmmlu_cot_hard_agricultural_sciences.yaml | 3 ++- ...cot_hard_aviation_engineering_and_maintenance.yaml | 3 ++- .../tasks/kmmlu/cot_hard/kmmlu_cot_hard_biology.yaml | 3 ++- .../cot_hard/kmmlu_cot_hard_chemical_engineering.yaml | 3 ++- .../kmmlu/cot_hard/kmmlu_cot_hard_chemistry.yaml | 3 ++- .../cot_hard/kmmlu_cot_hard_civil_engineering.yaml | 3 ++- .../cot_hard/kmmlu_cot_hard_computer_science.yaml | 3 ++- .../kmmlu/cot_hard/kmmlu_cot_hard_construction.yaml | 3 ++- .../kmmlu/cot_hard/kmmlu_cot_hard_criminal_law.yaml | 3 ++- .../tasks/kmmlu/cot_hard/kmmlu_cot_hard_ecology.yaml | 3 ++- .../kmmlu/cot_hard/kmmlu_cot_hard_economics.yaml | 3 ++- .../kmmlu/cot_hard/kmmlu_cot_hard_education.yaml | 3 ++- .../kmmlu_cot_hard_electrical_engineering.yaml | 3 ++- .../kmmlu_cot_hard_electronics_engineering.yaml | 3 ++- .../cot_hard/kmmlu_cot_hard_energy_management.yaml | 3 ++- .../kmmlu_cot_hard_environmental_science.yaml | 3 ++- .../tasks/kmmlu/cot_hard/kmmlu_cot_hard_fashion.yaml | 3 ++- .../cot_hard/kmmlu_cot_hard_food_processing.yaml | 3 ++- ...kmmlu_cot_hard_gas_technology_and_engineering.yaml | 3 ++- .../kmmlu/cot_hard/kmmlu_cot_hard_geomatics.yaml | 3 ++- .../tasks/kmmlu/cot_hard/kmmlu_cot_hard_health.yaml | 3 ++- .../cot_hard/kmmlu_cot_hard_industrial_engineer.yaml | 3 ++- .../kmmlu_cot_hard_information_technology.yaml | 3 ++- ...mlu_cot_hard_interior_architecture_and_design.yaml | 3 ++- .../kmmlu/cot_hard/kmmlu_cot_hard_korean_history.yaml | 3 ++- lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_law.yaml | 3 ++- ...mlu_cot_hard_machine_design_and_manufacturing.yaml | 3 ++- .../kmmlu/cot_hard/kmmlu_cot_hard_management.yaml | 3 ++- .../cot_hard/kmmlu_cot_hard_maritime_engineering.yaml | 3 ++- .../kmmlu/cot_hard/kmmlu_cot_hard_marketing.yaml | 3 ++- .../kmmlu_cot_hard_materials_engineering.yaml | 3 ++- lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_math.yaml | 3 ++- .../kmmlu_cot_hard_mechanical_engineering.yaml | 3 ++- .../kmmlu_cot_hard_nondestructive_testing.yaml | 3 ++- .../tasks/kmmlu/cot_hard/kmmlu_cot_hard_patent.yaml | 3 ++- ...mmlu_cot_hard_political_science_and_sociology.yaml | 3 ++- .../kmmlu/cot_hard/kmmlu_cot_hard_psychology.yaml | 3 ++- .../kmmlu/cot_hard/kmmlu_cot_hard_public_safety.yaml | 3 ++- ...u_cot_hard_railway_and_automotive_engineering.yaml | 3 ++- .../kmmlu/cot_hard/kmmlu_cot_hard_real_estate.yaml | 3 ++- .../kmmlu_cot_hard_refrigerating_machinery.yaml | 3 ++- .../kmmlu/cot_hard/kmmlu_cot_hard_social_welfare.yaml | 3 ++- .../tasks/kmmlu/cot_hard/kmmlu_cot_hard_taxation.yaml | 3 ++- ...rd_telecommunications_and_wireless_technology.yaml | 3 ++- lm_eval/tasks/kmmlu/direct/_direct_kmmlu_yaml | 3 --- lm_eval/tasks/kmmlu/direct/_kmmlu_direct.yaml | 11 +++++++++++ .../kmmlu/direct/_kmmlu_direct_applied_science.yaml | 8 ++++++++ lm_eval/tasks/kmmlu/direct/_kmmlu_direct_humss.yaml | 8 ++++++++ lm_eval/tasks/kmmlu/direct/_kmmlu_direct_other.yaml | 8 ++++++++ lm_eval/tasks/kmmlu/direct/_kmmlu_direct_stem.yaml | 8 ++++++++ .../tasks/kmmlu/direct/kmmlu_direct_accounting.yaml | 1 + .../direct/kmmlu_direct_agricultural_sciences.yaml | 1 + ...u_direct_aviation_engineering_and_maintenance.yaml | 1 + lm_eval/tasks/kmmlu/direct/kmmlu_direct_biology.yaml | 1 + .../direct/kmmlu_direct_chemical_engineering.yaml | 1 + .../tasks/kmmlu/direct/kmmlu_direct_chemistry.yaml | 1 + .../kmmlu/direct/kmmlu_direct_civil_engineering.yaml | 1 + .../kmmlu/direct/kmmlu_direct_computer_science.yaml | 1 + .../tasks/kmmlu/direct/kmmlu_direct_construction.yaml | 1 + .../tasks/kmmlu/direct/kmmlu_direct_criminal_law.yaml | 1 + lm_eval/tasks/kmmlu/direct/kmmlu_direct_ecology.yaml | 1 + .../tasks/kmmlu/direct/kmmlu_direct_economics.yaml | 1 + .../tasks/kmmlu/direct/kmmlu_direct_education.yaml | 1 + .../direct/kmmlu_direct_electrical_engineering.yaml | 1 + .../direct/kmmlu_direct_electronics_engineering.yaml | 1 + .../kmmlu/direct/kmmlu_direct_energy_management.yaml | 1 + .../direct/kmmlu_direct_environmental_science.yaml | 1 + lm_eval/tasks/kmmlu/direct/kmmlu_direct_fashion.yaml | 1 + .../kmmlu/direct/kmmlu_direct_food_processing.yaml | 1 + .../kmmlu_direct_gas_technology_and_engineering.yaml | 1 + .../tasks/kmmlu/direct/kmmlu_direct_geomatics.yaml | 1 + lm_eval/tasks/kmmlu/direct/kmmlu_direct_health.yaml | 1 + .../direct/kmmlu_direct_industrial_engineer.yaml | 1 + .../direct/kmmlu_direct_information_technology.yaml | 1 + ...kmmlu_direct_interior_architecture_and_design.yaml | 1 + .../kmmlu/direct/kmmlu_direct_korean_history.yaml | 1 + lm_eval/tasks/kmmlu/direct/kmmlu_direct_law.yaml | 1 + ...kmmlu_direct_machine_design_and_manufacturing.yaml | 1 + .../tasks/kmmlu/direct/kmmlu_direct_management.yaml | 1 + .../direct/kmmlu_direct_maritime_engineering.yaml | 1 + .../tasks/kmmlu/direct/kmmlu_direct_marketing.yaml | 1 + .../direct/kmmlu_direct_materials_engineering.yaml | 1 + lm_eval/tasks/kmmlu/direct/kmmlu_direct_math.yaml | 1 + .../direct/kmmlu_direct_mechanical_engineering.yaml | 1 + .../direct/kmmlu_direct_nondestructive_testing.yaml | 1 + lm_eval/tasks/kmmlu/direct/kmmlu_direct_patent.yaml | 1 + .../kmmlu_direct_political_science_and_sociology.yaml | 1 + .../tasks/kmmlu/direct/kmmlu_direct_psychology.yaml | 1 + .../kmmlu/direct/kmmlu_direct_public_safety.yaml | 1 + ...mlu_direct_railway_and_automotive_engineering.yaml | 1 + .../tasks/kmmlu/direct/kmmlu_direct_real_estate.yaml | 1 + .../direct/kmmlu_direct_refrigerating_machinery.yaml | 1 + .../kmmlu/direct/kmmlu_direct_social_welfare.yaml | 1 + lm_eval/tasks/kmmlu/direct/kmmlu_direct_taxation.yaml | 1 + ...ct_telecommunications_and_wireless_technology.yaml | 1 + .../tasks/kmmlu/direct_hard/_direct_hard_kmmlu_yaml | 3 --- .../tasks/kmmlu/direct_hard/_kmmlu_direct_hard.yaml | 11 +++++++++++ .../_kmmlu_direct_hard_applied_science.yaml | 8 ++++++++ .../kmmlu/direct_hard/_kmmlu_direct_hard_humss.yaml | 8 ++++++++ .../kmmlu/direct_hard/_kmmlu_direct_hard_other.yaml | 8 ++++++++ .../kmmlu/direct_hard/_kmmlu_direct_hard_stem.yaml | 8 ++++++++ .../direct_hard/kmmlu_direct_hard_accounting.yaml | 3 ++- .../kmmlu_direct_hard_agricultural_sciences.yaml | 3 ++- ...ect_hard_aviation_engineering_and_maintenance.yaml | 3 ++- .../kmmlu/direct_hard/kmmlu_direct_hard_biology.yaml | 3 ++- .../kmmlu_direct_hard_chemical_engineering.yaml | 3 ++- .../direct_hard/kmmlu_direct_hard_chemistry.yaml | 3 ++- .../kmmlu_direct_hard_civil_engineering.yaml | 3 ++- .../kmmlu_direct_hard_computer_science.yaml | 3 ++- .../direct_hard/kmmlu_direct_hard_construction.yaml | 3 ++- .../direct_hard/kmmlu_direct_hard_criminal_law.yaml | 3 ++- .../kmmlu/direct_hard/kmmlu_direct_hard_ecology.yaml | 3 ++- .../direct_hard/kmmlu_direct_hard_economics.yaml | 3 ++- .../direct_hard/kmmlu_direct_hard_education.yaml | 3 ++- .../kmmlu_direct_hard_electrical_engineering.yaml | 3 ++- .../kmmlu_direct_hard_electronics_engineering.yaml | 3 ++- .../kmmlu_direct_hard_energy_management.yaml | 3 ++- .../kmmlu_direct_hard_environmental_science.yaml | 3 ++- .../kmmlu/direct_hard/kmmlu_direct_hard_fashion.yaml | 3 ++- .../kmmlu_direct_hard_food_processing.yaml | 3 ++- ...lu_direct_hard_gas_technology_and_engineering.yaml | 3 ++- .../direct_hard/kmmlu_direct_hard_geomatics.yaml | 3 ++- .../kmmlu/direct_hard/kmmlu_direct_hard_health.yaml | 3 ++- .../kmmlu_direct_hard_industrial_engineer.yaml | 3 ++- .../kmmlu_direct_hard_information_technology.yaml | 3 ++- ..._direct_hard_interior_architecture_and_design.yaml | 3 ++- .../direct_hard/kmmlu_direct_hard_korean_history.yaml | 3 ++- .../kmmlu/direct_hard/kmmlu_direct_hard_law.yaml | 3 ++- ..._direct_hard_machine_design_and_manufacturing.yaml | 3 ++- .../direct_hard/kmmlu_direct_hard_management.yaml | 3 ++- .../kmmlu_direct_hard_maritime_engineering.yaml | 3 ++- .../direct_hard/kmmlu_direct_hard_marketing.yaml | 3 ++- .../kmmlu_direct_hard_materials_engineering.yaml | 3 ++- .../kmmlu/direct_hard/kmmlu_direct_hard_math.yaml | 3 ++- .../kmmlu_direct_hard_mechanical_engineering.yaml | 3 ++- .../kmmlu_direct_hard_nondestructive_testing.yaml | 3 ++- .../kmmlu/direct_hard/kmmlu_direct_hard_patent.yaml | 3 ++- ...u_direct_hard_political_science_and_sociology.yaml | 3 ++- .../direct_hard/kmmlu_direct_hard_psychology.yaml | 3 ++- .../direct_hard/kmmlu_direct_hard_public_safety.yaml | 3 ++- ...irect_hard_railway_and_automotive_engineering.yaml | 3 ++- .../direct_hard/kmmlu_direct_hard_real_estate.yaml | 3 ++- .../kmmlu_direct_hard_refrigerating_machinery.yaml | 3 ++- .../direct_hard/kmmlu_direct_hard_social_welfare.yaml | 3 ++- .../kmmlu/direct_hard/kmmlu_direct_hard_taxation.yaml | 3 ++- ...rd_telecommunications_and_wireless_technology.yaml | 3 ++- lm_eval/tasks/kmmlu/hard/_hard_kmmlu_yaml | 6 ------ lm_eval/tasks/kmmlu/hard/_kmmlu_hard.yaml | 11 +++++++++++ .../tasks/kmmlu/hard/_kmmlu_hard_applied_science.yaml | 8 ++++++++ lm_eval/tasks/kmmlu/hard/_kmmlu_hard_humss.yaml | 8 ++++++++ lm_eval/tasks/kmmlu/hard/_kmmlu_hard_other.yaml | 8 ++++++++ lm_eval/tasks/kmmlu/hard/_kmmlu_hard_stem.yaml | 8 ++++++++ lm_eval/tasks/kmmlu/hard/kmmlu_hard_accounting.yaml | 1 + .../kmmlu/hard/kmmlu_hard_agricultural_sciences.yaml | 1 + ...mlu_hard_aviation_engineering_and_maintenance.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_biology.yaml | 1 + .../kmmlu/hard/kmmlu_hard_chemical_engineering.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemistry.yaml | 1 + .../kmmlu/hard/kmmlu_hard_civil_engineering.yaml | 1 + .../tasks/kmmlu/hard/kmmlu_hard_computer_science.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_construction.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_criminal_law.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_ecology.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_economics.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_education.yaml | 1 + .../kmmlu/hard/kmmlu_hard_electrical_engineering.yaml | 1 + .../hard/kmmlu_hard_electronics_engineering.yaml | 1 + .../kmmlu/hard/kmmlu_hard_energy_management.yaml | 1 + .../kmmlu/hard/kmmlu_hard_environmental_science.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_fashion.yaml | 1 + .../tasks/kmmlu/hard/kmmlu_hard_food_processing.yaml | 1 + .../kmmlu_hard_gas_technology_and_engineering.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_geomatics.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_health.yaml | 1 + .../kmmlu/hard/kmmlu_hard_industrial_engineer.yaml | 1 + .../kmmlu/hard/kmmlu_hard_information_technology.yaml | 1 + .../kmmlu_hard_interior_architecture_and_design.yaml | 1 + .../tasks/kmmlu/hard/kmmlu_hard_korean_history.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_law.yaml | 1 + .../kmmlu_hard_machine_design_and_manufacturing.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_management.yaml | 1 + .../kmmlu/hard/kmmlu_hard_maritime_engineering.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_marketing.yaml | 1 + .../kmmlu/hard/kmmlu_hard_materials_engineering.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_math.yaml | 1 + .../kmmlu/hard/kmmlu_hard_mechanical_engineering.yaml | 1 + .../kmmlu/hard/kmmlu_hard_nondestructive_testing.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_patent.yaml | 1 + .../kmmlu_hard_political_science_and_sociology.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_psychology.yaml | 1 + .../tasks/kmmlu/hard/kmmlu_hard_public_safety.yaml | 1 + ...kmmlu_hard_railway_and_automotive_engineering.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_real_estate.yaml | 1 + .../hard/kmmlu_hard_refrigerating_machinery.yaml | 1 + .../tasks/kmmlu/hard/kmmlu_hard_social_welfare.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_taxation.yaml | 1 + ...rd_telecommunications_and_wireless_technology.yaml | 1 + 204 files changed, 442 insertions(+), 105 deletions(-) create mode 100644 lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard.yaml create mode 100644 lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_applied_science.yaml create mode 100644 lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_humss.yaml create mode 100644 lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_other.yaml create mode 100644 lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_stem.yaml create mode 100644 lm_eval/tasks/kmmlu/direct/_kmmlu_direct.yaml create mode 100644 lm_eval/tasks/kmmlu/direct/_kmmlu_direct_applied_science.yaml create mode 100644 lm_eval/tasks/kmmlu/direct/_kmmlu_direct_humss.yaml create mode 100644 lm_eval/tasks/kmmlu/direct/_kmmlu_direct_other.yaml create mode 100644 lm_eval/tasks/kmmlu/direct/_kmmlu_direct_stem.yaml create mode 100644 lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard.yaml create mode 100644 lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_applied_science.yaml create mode 100644 lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_humss.yaml create mode 100644 lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_other.yaml create mode 100644 lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_stem.yaml create mode 100644 lm_eval/tasks/kmmlu/hard/_kmmlu_hard.yaml create mode 100644 lm_eval/tasks/kmmlu/hard/_kmmlu_hard_applied_science.yaml create mode 100644 lm_eval/tasks/kmmlu/hard/_kmmlu_hard_humss.yaml create mode 100644 lm_eval/tasks/kmmlu/hard/_kmmlu_hard_other.yaml create mode 100644 lm_eval/tasks/kmmlu/hard/_kmmlu_hard_stem.yaml diff --git a/lm_eval/tasks/kmmlu/cot_hard/_cot_kmmlu_yaml b/lm_eval/tasks/kmmlu/cot_hard/_cot_kmmlu_yaml index 163a03df..0c0fadf7 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/_cot_kmmlu_yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/_cot_kmmlu_yaml @@ -1,6 +1,3 @@ -tag: - - kmmlu - - kmmlu_hard_cot dataset_path: HAERAE-HUB/KMMLU-HARD output_type: generate_until validation_split: dev # not meant to be used, only here to silence warnings diff --git a/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard.yaml b/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard.yaml new file mode 100644 index 00000000..1e459a05 --- /dev/null +++ b/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard.yaml @@ -0,0 +1,11 @@ +group: kmmlu_cot_hard +task: + - kmmlu_cot_hard_stem + - kmmlu_cot_hard_other + - kmmlu_cot_hard_applied_science + - kmmlu_cot_hard_humss +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_applied_science.yaml b/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_applied_science.yaml new file mode 100644 index 00000000..4944cefb --- /dev/null +++ b/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_applied_science.yaml @@ -0,0 +1,8 @@ +group: kmmlu_cot_hard_applied_science +task: + - kmmlu_cot_hard_applied_science_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_humss.yaml b/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_humss.yaml new file mode 100644 index 00000000..7b30f358 --- /dev/null +++ b/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_humss.yaml @@ -0,0 +1,8 @@ +group: kmmlu_cot_hard_humss +task: + - kmmlu_cot_hard_humss_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_other.yaml b/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_other.yaml new file mode 100644 index 00000000..70329cf4 --- /dev/null +++ b/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_other.yaml @@ -0,0 +1,8 @@ +group: kmmlu_cot_hard_other +task: + - kmmlu_cot_hard_other_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_stem.yaml b/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_stem.yaml new file mode 100644 index 00000000..65d92fe2 --- /dev/null +++ b/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_stem.yaml @@ -0,0 +1,8 @@ +group: kmmlu_cot_hard_stem +task: + - kmmlu_cot_hard_stem_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_accounting.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_accounting.yaml index bb17436e..0a89dce5 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_accounting.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_accounting.yaml @@ -78,4 +78,5 @@ fewshot_config: 당기순이익은 과소 계상됩니다. 왜냐하면 매출원가가 더 높아지면 이익은 줄어들기 때문입니다. , 상품재고액을 과대 계상한 경우 매출원가는 과대 계상되고, 당기순이익은 과소 계상됩니다. '따라서, 정답은 (A) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_accounting +task: kmmlu_cot_hard_accounting +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_agricultural_sciences.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_agricultural_sciences.yaml index b100094b..d3ab5734 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_agricultural_sciences.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_agricultural_sciences.yaml @@ -80,4 +80,5 @@ fewshot_config: 각 선택지를 분석한 결과 (C) 선택지인 '감자의 바이러스 병을 막기 위해 평지에서 채종한다.'가 가장 잘못된 방법으로 보입니다. 이는 감자의 바이러스 병 예방과 평지에서의 채종 사이에 직접적인 연관성이 없기 때문입니다. 따라서, 정답은 (C) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_agricultural_sciences +task: kmmlu_cot_hard_agricultural_sciences +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_aviation_engineering_and_maintenance.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_aviation_engineering_and_maintenance.yaml index f9cd217f..dcc59f88 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_aviation_engineering_and_maintenance.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_aviation_engineering_and_maintenance.yaml @@ -85,4 +85,5 @@ fewshot_config: (D) 옆놀이의 안정성 향상을 위해서는 트위스트가 중요한 역할을 합니다. 트위스트는 날개 팁 부분의 각도를 조절하여, 항공기가 고속에서도 안정적으로 비행할 수 있도록 돕습니다. 따라서, 정답은 (D) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_aviation_engineering_and_maintenance +task: kmmlu_cot_hard_aviation_engineering_and_maintenance +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_biology.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_biology.yaml index 4d6e52b7..52e0c77d 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_biology.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_biology.yaml @@ -80,4 +80,5 @@ fewshot_config: 없어야 합니다. 이러한 조건을 충족하는 미생물은 절대호산성 미생물입니다. 절대호산성 미생물은 극도로 산성 환경에서만 생존할 수 있으며, 중성 또는 알칼리성 환경에서는 성장할 수 없습니다. 따라서, 정답은 (A) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_biology +task: kmmlu_cot_hard_biology +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_chemical_engineering.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_chemical_engineering.yaml index 9b7435d3..49ebe866 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_chemical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_chemical_engineering.yaml @@ -87,4 +87,5 @@ fewshot_config: 압력, V는 부피입니다. W = -P1Vln(P2/P1) = -(10×10^5 Pa)(0.05m^3)ln((1×10^5 Pa)/(10×10^5 Pa)) = 0입니다. 따라서, 정답은 (A) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_chemical_engineering +task: kmmlu_cot_hard_chemical_engineering +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_chemistry.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_chemistry.yaml index d761f5e2..0cfd1dff 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_chemistry.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_chemistry.yaml @@ -76,4 +76,5 @@ fewshot_config: 황산의 분자량은 98g/mol입니다. 황산의 몰 수는 49g ÷ 98g/mol = 0.5mol입니다. 이 수용액의 물 농도는 0.5mol/1L = 0.5M입니다. 따라서, 정답은 (A) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_chemistry +task: kmmlu_cot_hard_chemistry +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_civil_engineering.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_civil_engineering.yaml index 87d3d22e..13893796 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_civil_engineering.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_civil_engineering.yaml @@ -97,4 +97,5 @@ fewshot_config: 것이며, 이 계약은 미국의 근대도시계획 성립기에 지역제의 바탕이 된 제도는 (A) 협약(covenant)이 가장 적절한 선택입니다. 따라서, 정답은 (A) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_civil_engineering +task: kmmlu_cot_hard_civil_engineering +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_computer_science.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_computer_science.yaml index 463b8e75..f8399409 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_computer_science.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_computer_science.yaml @@ -96,4 +96,5 @@ fewshot_config: 주어진 설명에서 언급된 감사 추적(Auditing)이나 Shadow Password와 같은 부가적인 기능보다는 사용자 간 침범 차단과 사용자별 파일 권한 설정에 초점을 맞춘 것으로 정의됩니다. 따라서, 정답은 (B) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_computer_science +task: kmmlu_cot_hard_computer_science +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_construction.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_construction.yaml index a277f637..3cfb3e9f 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_construction.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_construction.yaml @@ -83,4 +83,5 @@ fewshot_config: 압축비가 9입니다. 이를 식에 대입하여 연소실 체적을 계산해 보겠습니다. 행정체적 = 240 압축비 = 9 연소실_체적 = 행정체적 / (압축비 - 1) = 240 / 8 = 30 연소실의 체적은 30cc입니다. 따라서, 정답은 (B) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_construction +task: kmmlu_cot_hard_construction +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_criminal_law.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_criminal_law.yaml index fa46f0f4..559ff679 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_criminal_law.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_criminal_law.yaml @@ -106,4 +106,5 @@ fewshot_config: 고려에 따라 변경된 경우에 형법 제1조 제2항이 적용되는 것은 맞지만, 법령의 변경이 있더라도 그것이 반성적 고려에서 비롯된 것이 아니라면 형법 제1조 제2항이 적용되지 않습니다. 따라서, 정답은 (C) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_criminal_law +task: kmmlu_cot_hard_criminal_law +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_ecology.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_ecology.yaml index 0da8a9a7..0d59eaa7 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_ecology.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_ecology.yaml @@ -88,4 +88,5 @@ fewshot_config: 질소는 질소 가스나 암모니아의 형태로 존재합니다. , 위의 분석에 따르면, (B) 옵션의 설명이 잘못되었습니다. 아연과 코발트는 일반적으로 미량원소로 분류되지만, 이 설명에서는 그들을 다량 원소로 잘못 분류하고 있습니다. 따라서, 정답은 (B) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_ecology +task: kmmlu_cot_hard_ecology +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_economics.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_economics.yaml index baaa3bd9..838e0bd5 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_economics.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_economics.yaml @@ -87,4 +87,5 @@ fewshot_config: - 7,000,000원)은 어떻게 처리되었을까요? 이 금액은 회사의 이익에서 차감되어야 합니다. 이는 이익잉여금, 즉 회사가 벌어들인 이익에서 퇴직금 등을 지급한 후 남은 금액이 감소한다는 의미입니다. 선택지 D는 옳지 않습니다. 따라서, 정답은 (C) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_economics +task: kmmlu_cot_hard_economics +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_education.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_education.yaml index 0e68a0b0..899c38a8 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_education.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_education.yaml @@ -81,4 +81,5 @@ fewshot_config: 수 있습니다. 하지만, 평생교육법에는 전국평생학습도시협의회의 구성 및 운영에 필요한 사항은 교육부령으로 정한다는 내용은 없습니다. 이를 통해 선택지 (B)가 평생교육법 상평생학습도시에 대한 설명으로 옳지 않다는 것을 알 수 있습니다. 따라서, 정답은 (B) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_education +task: kmmlu_cot_hard_education +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_electrical_engineering.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_electrical_engineering.yaml index 3b5af8b7..f68f2ca7 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_electrical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_electrical_engineering.yaml @@ -93,4 +93,5 @@ fewshot_config: Pyrometer)입니다. 이는 흑체 또는 비슷한 조건의 물체로부터 방사되는 가시광선의 강도를 통해 온도를 측정하는 방식을 사용하며, 흑체의 방사율을 1로 설정하여 보정하는 원리를 기반으로 합니다. 따라서, 정답은 (A) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_electrical_engineering +task: kmmlu_cot_hard_electrical_engineering +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_electronics_engineering.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_electronics_engineering.yaml index 4d132703..ce9ce0e3 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_electronics_engineering.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_electronics_engineering.yaml @@ -79,4 +79,5 @@ fewshot_config: 합니다. 마지막으로 선택지 (D)는 컴퓨터 시스템의 하드웨어 오류를 발견하고 그에 대한 적절한 조치를 한다는 내용입니다. 이 역시 운영체제의 기능으로, 하드웨어 오류를 감지하고 적절한 처리를 하여 시스템의 안정성을 유지하는 역할을 합니다. 따라서, 정답은 (A) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_electronics_engineering +task: kmmlu_cot_hard_electronics_engineering +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_energy_management.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_energy_management.yaml index 8dca183a..0c5e18b1 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_energy_management.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_energy_management.yaml @@ -85,4 +85,5 @@ fewshot_config: 요인이 아닙니다. , 태양광발전 모듈의 I-V 특성곡선에서 일사량에 따라 가장 많이 변화하는 것은 전류입니다. 따라서, 정답은 (B) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_energy_management +task: kmmlu_cot_hard_energy_management +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_environmental_science.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_environmental_science.yaml index d9080b07..47de0dca 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_environmental_science.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_environmental_science.yaml @@ -82,4 +82,5 @@ fewshot_config: 전통적인 구성요소는 아닙니다. 과정분석은 보다 일반적인 용어로, 다양한 맥락에서 사용될 수 있습니다. (D) 목록분석 (Inventory Analysis): 이 역시 LCA의 핵심 단계 중 하나입니다. 따라서, 정답은 (C) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_environmental_science +task: kmmlu_cot_hard_environmental_science +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_fashion.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_fashion.yaml index 983a6590..598aad05 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_fashion.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_fashion.yaml @@ -84,4 +84,5 @@ fewshot_config: 수선 등을 포함한 종합적인 서비스를 제공하는 것으로 보입니다. 이는 일반적인 클리닝 서비스와는 차별화된 서비스라고 볼 수 있습니다. 따라서, 정답은 (D) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_fashion +task: kmmlu_cot_hard_fashion +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_food_processing.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_food_processing.yaml index 2d3473f0..3cbec3d8 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_food_processing.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_food_processing.yaml @@ -78,4 +78,5 @@ fewshot_config: 이 품종은 상대적으로 높은 온도에 더 민감하게 반응하며, 일장의 변화에는 덜 민감한 특성을 가지고 있어 한국의 기후 특성에서 효과적으로 성장할 수 있는 조건을 가지고 있습니다. 따라서, 정답은 (D) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_food_processing +task: kmmlu_cot_hard_food_processing +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_gas_technology_and_engineering.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_gas_technology_and_engineering.yaml index a244b955..49551077 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_gas_technology_and_engineering.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_gas_technology_and_engineering.yaml @@ -85,4 +85,5 @@ fewshot_config: 이들은 모두 환경에 해롭습니다. 물은 염소 가스의 재해 방지용으로서의 흡수제나 재해제로서 적합하지 않습니다. 따라서, 정답은 (D) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_gas_technology_and_engineering +task: kmmlu_cot_hard_gas_technology_and_engineering +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_geomatics.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_geomatics.yaml index cfc4866a..961b20ce 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_geomatics.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_geomatics.yaml @@ -75,4 +75,5 @@ fewshot_config: 공식은 실제 거리의 제곱근에 축척분모를 곱한 값이 측정된 면적이 될 것입니다. 이렇게 보면, 공식이 의미하는 바를 잘 나타내는 것 같습니다. 따라서, 정답은 (B) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_geomatics +task: kmmlu_cot_hard_geomatics +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_health.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_health.yaml index d5b47791..29a432e4 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_health.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_health.yaml @@ -85,4 +85,5 @@ fewshot_config: 일반적으로는 사업장에서 자체적으로 실시하는 것이 일반적입니다. , 국민건강증진기금의 사용 범위에는 포함되지 않을 수 있습니다. 따라서, 정답은 (D) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_health +task: kmmlu_cot_hard_health +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_industrial_engineer.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_industrial_engineer.yaml index fc8f3dec..f087d221 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_industrial_engineer.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_industrial_engineer.yaml @@ -82,4 +82,5 @@ fewshot_config: 절삭저항의 대부분을 차지합니다. 이러한 정보를 바탕으로, 주분력이 절삭저항의 대부분을 차지하므로, 탄소강을 가공할 때 가장 큰 절삭저항을 주는 것은 주분력일 것이라고 추론할 수 있습니다. 따라서, 정답은 (D) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_industrial_engineer +task: kmmlu_cot_hard_industrial_engineer +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_information_technology.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_information_technology.yaml index aa557282..3d6fd3ce 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_information_technology.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_information_technology.yaml @@ -80,4 +80,5 @@ fewshot_config: 피어스 B-E형 발진 회로에서는 컬렉터-이미터 간의 임피던스가 유도성일 때 가장 안정한 발진을 지속할 수 있습니다. 따라서, 정답은 (B) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_information_technology +task: kmmlu_cot_hard_information_technology +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_interior_architecture_and_design.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_interior_architecture_and_design.yaml index 2b3849d7..84a61a70 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_interior_architecture_and_design.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_interior_architecture_and_design.yaml @@ -92,4 +92,5 @@ fewshot_config: 지칭하지 않으며, 실제 설계 및 계획 과정에서는 보통 최소값, 최대값, 또는 목표값과 같이 더 구체적이고 명확한 기준을 바탕으로 최적치수를 결정합니다. 따라서, 정답은 (C) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_interior_architecture_and_design +task: kmmlu_cot_hard_interior_architecture_and_design +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_korean_history.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_korean_history.yaml index 7498080c..18666a70 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_korean_history.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_korean_history.yaml @@ -97,4 +97,5 @@ fewshot_config: 군사 기구로, 흥선대원군은 왕권 강화를 위해 비변사의 기능을 약화시켰습니다. (D) 통상 수교 거부 정책을 추진하였다 - 흥선대원군은 외세의 침략을 막기 위해 통상 수교 거부 정책을 추진하였습니다. 따라서, 정답은 (C) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_korean_history +task: kmmlu_cot_hard_korean_history +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_law.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_law.yaml index 0328a010..ddbd97b6 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_law.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_law.yaml @@ -81,4 +81,5 @@ fewshot_config: 중 하나입니다. (D) 네트워크 취약성으로 발생하는 문제는 물리적 통제절차의 개선으로 해결해야 한다는 것은, 네트워크 보안 문제를 해결하기 위해 물리적인 통제 절차를 개선하는 것입니다. 이는 네트워크 보안을 강화하는 데 매우 중요한 역할을 합니다. 따라서, 정답은 (C) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_law +task: kmmlu_cot_hard_law +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_machine_design_and_manufacturing.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_machine_design_and_manufacturing.yaml index 4c6207bb..d1e0d88b 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_machine_design_and_manufacturing.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_machine_design_and_manufacturing.yaml @@ -83,4 +83,5 @@ fewshot_config: 선택지는 해칭이 주된 중심선 또는 단면도의 주된 외형선에 대하여 90℃ 기울기로 그린다는 내용인데, 이는 잘못된 내용입니다. 일반적으로 해칭은 45도 기울기로 그려집니다. , 이 선택지는 해칭의 일반적인 원칙을 잘못 설명하고 있습니다. 따라서, 정답은 (C) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_machine_design_and_manufacturing +task: kmmlu_cot_hard_machine_design_and_manufacturing +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_management.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_management.yaml index 11628904..435d762f 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_management.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_management.yaml @@ -76,4 +76,5 @@ fewshot_config: 각 부문별로 목표를 정하고 분산된 시스템을 구축하는 것은 물류 시스템의 효율성을 높일 수 있지만, 이는 통합적인 관리가 어려울 수 있습니다. 따라서, 정답은 (B) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_management +task: kmmlu_cot_hard_management +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_maritime_engineering.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_maritime_engineering.yaml index e168371f..bb7103eb 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_maritime_engineering.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_maritime_engineering.yaml @@ -98,4 +98,5 @@ fewshot_config: (D) 아르곤: 아르곤도 불활성 기체로, 지방질에 용해되거나 마취 효과를 나타내지 않습니다. 아르곤은 주로 산업 공정에서 보호 가스로 사용됩니다. 따라서, 정답은 (B) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_maritime_engineering +task: kmmlu_cot_hard_maritime_engineering +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_marketing.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_marketing.yaml index 240d92a2..971a106b 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_marketing.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_marketing.yaml @@ -91,4 +91,5 @@ fewshot_config: 있으며, 상담원이 고객의 반론에 대한 자연스러운 대응력을 갖추면 고객의 불만이나 반대를 효과적으로 처리할 수 있습니다. 따라서, 정답은 (A) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_marketing +task: kmmlu_cot_hard_marketing +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_materials_engineering.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_materials_engineering.yaml index 59774a15..4f5867e2 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_materials_engineering.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_materials_engineering.yaml @@ -84,4 +84,5 @@ fewshot_config: 구별하는 데 사용될 수 있습니다. 냉간가공은 재결성 온도 이하에서 이루어지며, 열간가공은 재결성 온도 이상에서 이루어집니다. , 냉간가공과 열간가공을 구별하는 기준은 재결성 온도라고 할 수 있습니다. 따라서, 정답은 (C) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_materials_engineering +task: kmmlu_cot_hard_materials_engineering +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_math.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_math.yaml index 103bc573..5aa474d2 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_math.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_math.yaml @@ -95,4 +95,5 @@ fewshot_config: + ω2019 입니다. , ω^2017 + ω^2019 = ω + 1 입니다. 주어진 식에 ω + 1을 대입하면 ω + 1 + ω + 1 + 1 + 1이 됩니다. 따라서, 정답은 (C) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_math +task: kmmlu_cot_hard_math +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_mechanical_engineering.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_mechanical_engineering.yaml index a57d0661..8d99ba72 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_mechanical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_mechanical_engineering.yaml @@ -77,4 +77,5 @@ fewshot_config: 어떤 것일까요? V벨트의 단면 크기는 알파벳이 뒤로 갈수록 커집니다 즉, A형은 B형보다 작고, B형은 C형보다 작으며, 이런 식으로 D형, E형으로 진행됩니다. , 주어진 선택지 중에서 가장 단면이 큰 V벨트는 E형일 것입니다. 따라서, 정답은 (C) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_mechanical_engineering +task: kmmlu_cot_hard_mechanical_engineering +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_nondestructive_testing.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_nondestructive_testing.yaml index c7ecea17..656b08ac 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_nondestructive_testing.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_nondestructive_testing.yaml @@ -91,4 +91,5 @@ fewshot_config: 시험체의 두께 t를 계산하면 다음과 같습니다. t = v / (2f) = 4800 / (2 * 2 * 10^6) = 0.0012m = 1.2mm 따라서, 정답은 (A) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_nondestructive_testing +task: kmmlu_cot_hard_nondestructive_testing +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_patent.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_patent.yaml index 1e5607a5..30b60825 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_patent.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_patent.yaml @@ -110,4 +110,5 @@ fewshot_config: 발명에 대해서는 먼저 출원한 자만이 특허를 받을 수 있다고 규정하고 있으므로, 乙은 특허를 받을 수 없습니다. , (D)는 옳은 설명입니다. 따라서, 정답은 (A) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_patent +task: kmmlu_cot_hard_patent +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_political_science_and_sociology.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_political_science_and_sociology.yaml index 50c159f9..7d8c4e56 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_political_science_and_sociology.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_political_science_and_sociology.yaml @@ -88,4 +88,5 @@ fewshot_config: 범위에서도 활용되는 전략입니다. 도시의 이미지를 국제적으로 홍보하고, 외국인 투자자나 관광객을 유치하는 것이 도시마케팅의 일부이기 때문입니다. 도시마케팅의 공간적 범위가 국내로만 한정되어 있다는 것은 잘못된 설명입니다. 따라서, 정답은 (D) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_political_science_and_sociology +task: kmmlu_cot_hard_political_science_and_sociology +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_psychology.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_psychology.yaml index f86d14e6..125befe1 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_psychology.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_psychology.yaml @@ -95,4 +95,5 @@ fewshot_config: 이러한 분석을 통해 고급 상담자의 특징은 (C) 내담자에게 의도적으로 주의를 기울이고 중요한 정보를 수집하고 인식할 수 있다는 것으로 보입니다. 이는 상담자의 기본적인 역량을 넘어서서 고급 상담자가 갖추어야 할 능력으로 보입니다. 따라서, 정답은 (C) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_psychology +task: kmmlu_cot_hard_psychology +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_public_safety.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_public_safety.yaml index 5cc5c148..5627770b 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_public_safety.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_public_safety.yaml @@ -90,4 +90,5 @@ fewshot_config: 산업안전ᆞ보건과 관련된 그 밖의 사항 , 선택지 중에서 산업안전보건위원회의 심의ᆞ의결을 거치지 않아도 되는 사항은 (B) 안전ᆞ보건과 관련된 안전장치 구입 시의 적격품 여부 확인에 관한 사항입니다. 따라서, 정답은 (B) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_public_safety +task: kmmlu_cot_hard_public_safety +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_railway_and_automotive_engineering.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_railway_and_automotive_engineering.yaml index c81e158a..5b8b436f 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_railway_and_automotive_engineering.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_railway_and_automotive_engineering.yaml @@ -85,4 +85,5 @@ fewshot_config: 위한 것입니다. (D) 기관의 과냉 및 소음방지를 위해 일정 회전수 이상 시 슬립 발생: 유체 커플링식 냉각 팬은 기관의 과냉 및 소음 방지를 위해 일정 회전수 이상 시 슬립이 발생합니다. 이는 유체 커플링의 특성 때문입니다. 따라서, 정답은 (A) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_railway_and_automotive_engineering +task: kmmlu_cot_hard_railway_and_automotive_engineering +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_real_estate.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_real_estate.yaml index 0e7d8100..38df4312 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_real_estate.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_real_estate.yaml @@ -88,4 +88,5 @@ fewshot_config: 따르면 개업공인중개사는 등록한 관할구역 외의 지역에 있는 중개대상물을 중개할 수 있습니다. 이 내용은 잘못된 내용입니다. 따라서, 정답은 (D) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_real_estate +task: kmmlu_cot_hard_real_estate +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_refrigerating_machinery.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_refrigerating_machinery.yaml index 7c3984e4..10624f2a 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_refrigerating_machinery.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_refrigerating_machinery.yaml @@ -88,4 +88,5 @@ fewshot_config: = 200.15K입니다. 그러므로, W = 1kJ * (300.15K - 200.15K) / 200.15K = 0.5kJ입니다. 따라서, 정답은 (D) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_refrigerating_machinery +task: kmmlu_cot_hard_refrigerating_machinery +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_social_welfare.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_social_welfare.yaml index d19fb511..64e6fb5a 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_social_welfare.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_social_welfare.yaml @@ -90,4 +90,5 @@ fewshot_config: 이는 사회복지정책의 본질적인 목표와 원칙을 반영하지 못하고 있습니다. 사회복지정책은 능력이 아닌 필요에 따라 지원을 하는 것이 원칙이며, 이를 통해 사회적 불평등을 해소하고 모든 사람이 기본적인 생활을 유지할 수 있도록 지원하는 것이 목표입니다. 따라서, 정답은 (B) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_social_welfare +task: kmmlu_cot_hard_social_welfare +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_taxation.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_taxation.yaml index 937a864e..fbf88067 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_taxation.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_taxation.yaml @@ -104,4 +104,5 @@ fewshot_config: 국가의 안전보장 목적의 수행상 긴요하다고 인정하여 수입하는 물품을 의미합니다. 이 또한 국가의 안전보장을 위해 필요한 물품이므로 면세 대상에 해당할 것으로 보입니다. 따라서, 정답은 (A) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_taxation +task: kmmlu_cot_hard_taxation +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_telecommunications_and_wireless_technology.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_telecommunications_and_wireless_technology.yaml index ca23afc0..54c5aac8 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_telecommunications_and_wireless_technology.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_telecommunications_and_wireless_technology.yaml @@ -83,4 +83,5 @@ fewshot_config: 증가하면, 전자기파의 세기는 1/r^2배 감소합니다. , 거리가 2배가 되면, 전자기파의 세기는 1/4배가 됩니다. 그리고 전력 밀도는 전기장과 자기장의 제곱에 비례하므로, 거리가 2배가 되면 전력 밀도는 1/4배가 됩니다. 따라서, 정답은 (D) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_telecommunications_and_wireless_technology +task: kmmlu_cot_hard_telecommunications_and_wireless_technology +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/_direct_kmmlu_yaml b/lm_eval/tasks/kmmlu/direct/_direct_kmmlu_yaml index a0c8dfdc..1ecb5fba 100644 --- a/lm_eval/tasks/kmmlu/direct/_direct_kmmlu_yaml +++ b/lm_eval/tasks/kmmlu/direct/_direct_kmmlu_yaml @@ -1,6 +1,3 @@ -tag: - - kmmlu - - kmmlu_direct dataset_path: HAERAE-HUB/KMMLU output_type: generate_until test_split: test diff --git a/lm_eval/tasks/kmmlu/direct/_kmmlu_direct.yaml b/lm_eval/tasks/kmmlu/direct/_kmmlu_direct.yaml new file mode 100644 index 00000000..9763d3d4 --- /dev/null +++ b/lm_eval/tasks/kmmlu/direct/_kmmlu_direct.yaml @@ -0,0 +1,11 @@ +group: kmmlu_direct +task: + - kmmlu_direct_stem + - kmmlu_direct_other + - kmmlu_direct_applied_science + - kmmlu_direct_humss +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_applied_science.yaml b/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_applied_science.yaml new file mode 100644 index 00000000..78937b3f --- /dev/null +++ b/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_applied_science.yaml @@ -0,0 +1,8 @@ +group: kmmlu_direct_applied_science +task: + - kmmlu_direct_applied_science_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_humss.yaml b/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_humss.yaml new file mode 100644 index 00000000..1c8e4f20 --- /dev/null +++ b/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_humss.yaml @@ -0,0 +1,8 @@ +group: kmmlu_direct_humss +task: + - kmmlu_direct_humss_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_other.yaml b/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_other.yaml new file mode 100644 index 00000000..eb5166ec --- /dev/null +++ b/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_other.yaml @@ -0,0 +1,8 @@ +group: kmmlu_direct_other +task: + - kmmlu_direct_other_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_stem.yaml b/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_stem.yaml new file mode 100644 index 00000000..932cc1e5 --- /dev/null +++ b/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_stem.yaml @@ -0,0 +1,8 @@ +group: kmmlu_direct_stem +task: + - kmmlu_direct_stem_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_accounting.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_accounting.yaml index d7736e8d..d61a84b8 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_accounting.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_accounting.yaml @@ -1,3 +1,4 @@ dataset_name: Accounting include: _direct_kmmlu_yaml task: kmmlu_direct_accounting +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_agricultural_sciences.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_agricultural_sciences.yaml index 5bf1fa4b..a8a2829b 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_agricultural_sciences.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_agricultural_sciences.yaml @@ -1,3 +1,4 @@ dataset_name: Agricultural-Sciences include: _direct_kmmlu_yaml task: kmmlu_direct_agricultural_sciences +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_aviation_engineering_and_maintenance.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_aviation_engineering_and_maintenance.yaml index a9a62193..d383834f 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_aviation_engineering_and_maintenance.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_aviation_engineering_and_maintenance.yaml @@ -1,3 +1,4 @@ dataset_name: Aviation-Engineering-and-Maintenance include: _direct_kmmlu_yaml task: kmmlu_direct_aviation_engineering_and_maintenance +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_biology.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_biology.yaml index ebe1765b..aeeb1e52 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_biology.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_biology.yaml @@ -1,3 +1,4 @@ dataset_name: Biology include: _direct_kmmlu_yaml task: kmmlu_direct_biology +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemical_engineering.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemical_engineering.yaml index e5875bb7..921073d5 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemical_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: Chemical-Engineering include: _direct_kmmlu_yaml task: kmmlu_direct_chemical_engineering +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemistry.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemistry.yaml index edabfb67..afa5b4b2 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemistry.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemistry.yaml @@ -1,3 +1,4 @@ dataset_name: Chemistry include: _direct_kmmlu_yaml task: kmmlu_direct_chemistry +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_civil_engineering.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_civil_engineering.yaml index 98ed98dd..b8c5064b 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_civil_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_civil_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: Civil-Engineering include: _direct_kmmlu_yaml task: kmmlu_direct_civil_engineering +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_computer_science.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_computer_science.yaml index c546e738..bac82f1f 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_computer_science.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_computer_science.yaml @@ -1,3 +1,4 @@ dataset_name: Computer-Science include: _direct_kmmlu_yaml task: kmmlu_direct_computer_science +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_construction.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_construction.yaml index a0af2a16..8cb9ada9 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_construction.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_construction.yaml @@ -1,3 +1,4 @@ dataset_name: Construction include: _direct_kmmlu_yaml task: kmmlu_direct_construction +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_criminal_law.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_criminal_law.yaml index 9dfdfabc..642a88bc 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_criminal_law.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_criminal_law.yaml @@ -1,3 +1,4 @@ dataset_name: Criminal-Law include: _direct_kmmlu_yaml task: kmmlu_direct_criminal_law +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_ecology.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_ecology.yaml index 9d182903..dffbb3c4 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_ecology.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_ecology.yaml @@ -1,3 +1,4 @@ dataset_name: Ecology include: _direct_kmmlu_yaml task: kmmlu_direct_ecology +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_economics.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_economics.yaml index db4d7840..1fc5d2c3 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_economics.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_economics.yaml @@ -1,3 +1,4 @@ dataset_name: Economics include: _direct_kmmlu_yaml task: kmmlu_direct_economics +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_education.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_education.yaml index 74887e76..dc151c87 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_education.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_education.yaml @@ -1,3 +1,4 @@ dataset_name: Education include: _direct_kmmlu_yaml task: kmmlu_direct_education +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electrical_engineering.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electrical_engineering.yaml index 3455d507..208e7b16 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electrical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electrical_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: Electrical-Engineering include: _direct_kmmlu_yaml task: kmmlu_direct_electrical_engineering +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electronics_engineering.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electronics_engineering.yaml index b45aa308..0a61e3d1 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electronics_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electronics_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: Electronics-Engineering include: _direct_kmmlu_yaml task: kmmlu_direct_electronics_engineering +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_energy_management.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_energy_management.yaml index b4fb806b..085f4246 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_energy_management.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_energy_management.yaml @@ -1,3 +1,4 @@ dataset_name: Energy-Management include: _direct_kmmlu_yaml task: kmmlu_direct_energy_management +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_environmental_science.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_environmental_science.yaml index 1670ff16..104a4b9e 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_environmental_science.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_environmental_science.yaml @@ -1,3 +1,4 @@ dataset_name: Environmental-Science include: _direct_kmmlu_yaml task: kmmlu_direct_environmental_science +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_fashion.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_fashion.yaml index aef8043a..561e565c 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_fashion.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_fashion.yaml @@ -1,3 +1,4 @@ dataset_name: Fashion include: _direct_kmmlu_yaml task: kmmlu_direct_fashion +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_food_processing.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_food_processing.yaml index f49b087f..3050c82a 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_food_processing.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_food_processing.yaml @@ -1,3 +1,4 @@ dataset_name: Food-Processing include: _direct_kmmlu_yaml task: kmmlu_direct_food_processing +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_gas_technology_and_engineering.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_gas_technology_and_engineering.yaml index 00b7021c..708e76d8 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_gas_technology_and_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_gas_technology_and_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: Gas-Technology-and-Engineering include: _direct_kmmlu_yaml task: kmmlu_direct_gas_technology_and_engineering +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_geomatics.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_geomatics.yaml index 5d8dc70d..0937bcfc 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_geomatics.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_geomatics.yaml @@ -1,3 +1,4 @@ dataset_name: Geomatics include: _direct_kmmlu_yaml task: kmmlu_direct_geomatics +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_health.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_health.yaml index 3f0d77eb..70ef5736 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_health.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_health.yaml @@ -1,3 +1,4 @@ dataset_name: Health include: _direct_kmmlu_yaml task: kmmlu_direct_health +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_industrial_engineer.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_industrial_engineer.yaml index 39ea0bcf..14545201 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_industrial_engineer.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_industrial_engineer.yaml @@ -1,3 +1,4 @@ dataset_name: Industrial-Engineer include: _direct_kmmlu_yaml task: kmmlu_direct_industrial_engineer +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_information_technology.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_information_technology.yaml index c42e80ed..50fc6e91 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_information_technology.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_information_technology.yaml @@ -1,3 +1,4 @@ dataset_name: Information-Technology include: _direct_kmmlu_yaml task: kmmlu_direct_information_technology +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_interior_architecture_and_design.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_interior_architecture_and_design.yaml index 842534aa..638de434 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_interior_architecture_and_design.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_interior_architecture_and_design.yaml @@ -1,3 +1,4 @@ dataset_name: Interior-Architecture-and-Design include: _direct_kmmlu_yaml task: kmmlu_direct_interior_architecture_and_design +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_korean_history.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_korean_history.yaml index f1aa277a..6d6b20ba 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_korean_history.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_korean_history.yaml @@ -1,3 +1,4 @@ dataset_name: Korean-History include: _direct_kmmlu_yaml task: kmmlu_direct_korean_history +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_law.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_law.yaml index 602f8982..29685852 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_law.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_law.yaml @@ -1,3 +1,4 @@ dataset_name: Law include: _direct_kmmlu_yaml task: kmmlu_direct_law +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_machine_design_and_manufacturing.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_machine_design_and_manufacturing.yaml index bfb923c2..587d25d0 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_machine_design_and_manufacturing.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_machine_design_and_manufacturing.yaml @@ -1,3 +1,4 @@ dataset_name: Machine-Design-and-Manufacturing include: _direct_kmmlu_yaml task: kmmlu_direct_machine_design_and_manufacturing +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_management.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_management.yaml index 7352a136..aec441bb 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_management.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_management.yaml @@ -1,3 +1,4 @@ dataset_name: Management include: _direct_kmmlu_yaml task: kmmlu_direct_management +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_maritime_engineering.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_maritime_engineering.yaml index fa0c8f31..e7e1f12e 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_maritime_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_maritime_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: Maritime-Engineering include: _direct_kmmlu_yaml task: kmmlu_direct_maritime_engineering +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_marketing.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_marketing.yaml index c3b524d8..10dadc00 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_marketing.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_marketing.yaml @@ -1,3 +1,4 @@ dataset_name: Marketing include: _direct_kmmlu_yaml task: kmmlu_direct_marketing +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_materials_engineering.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_materials_engineering.yaml index f04e0975..d0463266 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_materials_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_materials_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: Materials-Engineering include: _direct_kmmlu_yaml task: kmmlu_direct_materials_engineering +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_math.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_math.yaml index 6c5d28af..20d17c01 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_math.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_math.yaml @@ -1,3 +1,4 @@ dataset_name: Math include: _direct_kmmlu_yaml task: kmmlu_direct_math +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_mechanical_engineering.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_mechanical_engineering.yaml index a253535a..3ddb2796 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_mechanical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_mechanical_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: Mechanical-Engineering include: _direct_kmmlu_yaml task: kmmlu_direct_mechanical_engineering +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_nondestructive_testing.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_nondestructive_testing.yaml index 3b8dc7e7..3e37bd1c 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_nondestructive_testing.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_nondestructive_testing.yaml @@ -1,3 +1,4 @@ dataset_name: Nondestructive-Testing include: _direct_kmmlu_yaml task: kmmlu_direct_nondestructive_testing +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_patent.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_patent.yaml index 2afff2c3..e829b995 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_patent.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_patent.yaml @@ -1,3 +1,4 @@ dataset_name: Patent include: _direct_kmmlu_yaml task: kmmlu_direct_patent +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_political_science_and_sociology.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_political_science_and_sociology.yaml index 2209abbf..adf6c1b7 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_political_science_and_sociology.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_political_science_and_sociology.yaml @@ -1,3 +1,4 @@ dataset_name: Political-Science-and-Sociology include: _direct_kmmlu_yaml task: kmmlu_direct_political_science_and_sociology +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_psychology.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_psychology.yaml index 140302d0..a8ccfcbd 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_psychology.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_psychology.yaml @@ -1,3 +1,4 @@ dataset_name: Psychology include: _direct_kmmlu_yaml task: kmmlu_direct_psychology +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_public_safety.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_public_safety.yaml index 5bb16a90..5926a45c 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_public_safety.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_public_safety.yaml @@ -1,3 +1,4 @@ dataset_name: Public-Safety include: _direct_kmmlu_yaml task: kmmlu_direct_public_safety +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_railway_and_automotive_engineering.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_railway_and_automotive_engineering.yaml index 2a13204a..fa92c9fb 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_railway_and_automotive_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_railway_and_automotive_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: Railway-and-Automotive-Engineering include: _direct_kmmlu_yaml task: kmmlu_direct_railway_and_automotive_engineering +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_real_estate.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_real_estate.yaml index 5a5202b6..e8872a53 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_real_estate.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_real_estate.yaml @@ -1,3 +1,4 @@ dataset_name: Real-Estate include: _direct_kmmlu_yaml task: kmmlu_direct_real_estate +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_refrigerating_machinery.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_refrigerating_machinery.yaml index 44f9e428..73787390 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_refrigerating_machinery.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_refrigerating_machinery.yaml @@ -1,3 +1,4 @@ dataset_name: Refrigerating-Machinery include: _direct_kmmlu_yaml task: kmmlu_direct_refrigerating_machinery +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_social_welfare.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_social_welfare.yaml index fa13bdff..52f731fb 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_social_welfare.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_social_welfare.yaml @@ -1,3 +1,4 @@ dataset_name: Social-Welfare include: _direct_kmmlu_yaml task: kmmlu_direct_social_welfare +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_taxation.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_taxation.yaml index 69e71d6d..caa0d798 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_taxation.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_taxation.yaml @@ -1,3 +1,4 @@ dataset_name: Taxation include: _direct_kmmlu_yaml task: kmmlu_direct_taxation +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_telecommunications_and_wireless_technology.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_telecommunications_and_wireless_technology.yaml index f4d1fd05..8f98b1d4 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_telecommunications_and_wireless_technology.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_telecommunications_and_wireless_technology.yaml @@ -1,3 +1,4 @@ dataset_name: Telecommunications-and-Wireless-Technology include: _direct_kmmlu_yaml task: kmmlu_direct_telecommunications_and_wireless_technology +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/_direct_hard_kmmlu_yaml b/lm_eval/tasks/kmmlu/direct_hard/_direct_hard_kmmlu_yaml index 3cf63592..f5ed0fda 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/_direct_hard_kmmlu_yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/_direct_hard_kmmlu_yaml @@ -1,6 +1,3 @@ -tag: - - kmmlu - - kmmlu_hard_direct dataset_path: HAERAE-HUB/KMMLU-HARD output_type: generate_until test_split: test diff --git a/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard.yaml b/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard.yaml new file mode 100644 index 00000000..54206cdb --- /dev/null +++ b/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard.yaml @@ -0,0 +1,11 @@ +group: kmmlu_direct_hard +task: + - kmmlu_direct_hard_stem + - kmmlu_direct_hard_other + - kmmlu_direct_hard_applied_science + - kmmlu_direct_hard_humss +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_applied_science.yaml b/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_applied_science.yaml new file mode 100644 index 00000000..0f70ae13 --- /dev/null +++ b/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_applied_science.yaml @@ -0,0 +1,8 @@ +group: kmmlu_direct_hard_applied_science +task: + - kmmlu_direct_hard_applied_science_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_humss.yaml b/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_humss.yaml new file mode 100644 index 00000000..b28fdd15 --- /dev/null +++ b/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_humss.yaml @@ -0,0 +1,8 @@ +group: kmmlu_direct_hard_humss +task: + - kmmlu_direct_hard_humss_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_other.yaml b/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_other.yaml new file mode 100644 index 00000000..f216caa6 --- /dev/null +++ b/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_other.yaml @@ -0,0 +1,8 @@ +group: kmmlu_direct_hard_other +task: + - kmmlu_direct_hard_other_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_stem.yaml b/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_stem.yaml new file mode 100644 index 00000000..026c6b48 --- /dev/null +++ b/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_stem.yaml @@ -0,0 +1,8 @@ +group: kmmlu_direct_hard_stem +task: + - kmmlu_direct_hard_stem_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_accounting.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_accounting.yaml index ca805e95..d92b933d 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_accounting.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_accounting.yaml @@ -1,3 +1,4 @@ dataset_name: accounting include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_accounting +task: kmmlu_direct_hard_accounting +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_agricultural_sciences.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_agricultural_sciences.yaml index 73483444..d78427d0 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_agricultural_sciences.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_agricultural_sciences.yaml @@ -1,3 +1,4 @@ dataset_name: agricultural_sciences include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_agricultural_sciences +task: kmmlu_direct_hard_agricultural_sciences +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_aviation_engineering_and_maintenance.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_aviation_engineering_and_maintenance.yaml index 25c91cb6..6713f04d 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_aviation_engineering_and_maintenance.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_aviation_engineering_and_maintenance.yaml @@ -1,3 +1,4 @@ dataset_name: aviation_engineering_and_maintenance include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_aviation_engineering_and_maintenance +task: kmmlu_direct_hard_aviation_engineering_and_maintenance +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_biology.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_biology.yaml index a7bc8417..e98a380f 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_biology.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_biology.yaml @@ -1,3 +1,4 @@ dataset_name: biology include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_biology +task: kmmlu_direct_hard_biology +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemical_engineering.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemical_engineering.yaml index 063974af..b505e317 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemical_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: chemical_engineering include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_chemical_engineering +task: kmmlu_direct_hard_chemical_engineering +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemistry.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemistry.yaml index 371db7bf..d805e234 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemistry.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemistry.yaml @@ -1,3 +1,4 @@ dataset_name: chemistry include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_chemistry +task: kmmlu_direct_hard_chemistry +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_civil_engineering.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_civil_engineering.yaml index ba2c23b2..30622d50 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_civil_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_civil_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: civil_engineering include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_civil_engineering +task: kmmlu_direct_hard_civil_engineering +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_computer_science.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_computer_science.yaml index 2a388ff4..bc0f5a37 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_computer_science.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_computer_science.yaml @@ -1,3 +1,4 @@ dataset_name: computer_science include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_computer_science +task: kmmlu_direct_hard_computer_science +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_construction.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_construction.yaml index faab391b..e050e106 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_construction.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_construction.yaml @@ -1,3 +1,4 @@ dataset_name: construction include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_construction +task: kmmlu_direct_hard_construction +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_criminal_law.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_criminal_law.yaml index d2679f1e..3072b6f0 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_criminal_law.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_criminal_law.yaml @@ -1,3 +1,4 @@ dataset_name: criminal_law include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_criminal_law +task: kmmlu_direct_hard_criminal_law +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_ecology.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_ecology.yaml index adedf9d6..3129f467 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_ecology.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_ecology.yaml @@ -1,3 +1,4 @@ dataset_name: ecology include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_ecology +task: kmmlu_direct_hard_ecology +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_economics.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_economics.yaml index f42e5b8d..87069840 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_economics.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_economics.yaml @@ -1,3 +1,4 @@ dataset_name: economics include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_economics +task: kmmlu_direct_hard_economics +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_education.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_education.yaml index 9c90432f..75baa136 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_education.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_education.yaml @@ -1,3 +1,4 @@ dataset_name: education include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_education +task: kmmlu_direct_hard_education +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electrical_engineering.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electrical_engineering.yaml index 780dad22..789cdfb8 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electrical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electrical_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: electrical_engineering include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_electrical_engineering +task: kmmlu_direct_hard_electrical_engineering +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electronics_engineering.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electronics_engineering.yaml index e0178154..9a1736e0 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electronics_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electronics_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: electronics_engineering include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_electronics_engineering +task: kmmlu_direct_hard_electronics_engineering +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_energy_management.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_energy_management.yaml index d4c2ca7d..4653272e 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_energy_management.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_energy_management.yaml @@ -1,3 +1,4 @@ dataset_name: energy_management include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_energy_management +task: kmmlu_direct_hard_energy_management +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_environmental_science.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_environmental_science.yaml index de511a09..60c0253e 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_environmental_science.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_environmental_science.yaml @@ -1,3 +1,4 @@ dataset_name: environmental_science include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_environmental_science +task: kmmlu_direct_hard_environmental_science +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_fashion.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_fashion.yaml index 26f0617d..86bbb9b4 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_fashion.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_fashion.yaml @@ -1,3 +1,4 @@ dataset_name: fashion include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_fashion +task: kmmlu_direct_hard_fashion +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_food_processing.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_food_processing.yaml index e48143d2..6b2817d2 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_food_processing.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_food_processing.yaml @@ -1,3 +1,4 @@ dataset_name: food_processing include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_food_processing +task: kmmlu_direct_hard_food_processing +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_gas_technology_and_engineering.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_gas_technology_and_engineering.yaml index eb5211ad..c2d2f477 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_gas_technology_and_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_gas_technology_and_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: gas_technology_and_engineering include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_gas_technology_and_engineering +task: kmmlu_direct_hard_gas_technology_and_engineering +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_geomatics.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_geomatics.yaml index a25f3c1a..9dadc72d 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_geomatics.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_geomatics.yaml @@ -1,3 +1,4 @@ dataset_name: geomatics include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_geomatics +task: kmmlu_direct_hard_geomatics +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_health.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_health.yaml index 0fef809e..f1bf4c77 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_health.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_health.yaml @@ -1,3 +1,4 @@ dataset_name: health include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_health +task: kmmlu_direct_hard_health +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_industrial_engineer.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_industrial_engineer.yaml index d7ca26e5..5f7b73ea 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_industrial_engineer.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_industrial_engineer.yaml @@ -1,3 +1,4 @@ dataset_name: industrial_engineer include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_industrial_engineer +task: kmmlu_direct_hard_industrial_engineer +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_information_technology.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_information_technology.yaml index 0f8d01ec..a1c5cf9d 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_information_technology.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_information_technology.yaml @@ -1,3 +1,4 @@ dataset_name: information_technology include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_information_technology +task: kmmlu_direct_hard_information_technology +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_interior_architecture_and_design.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_interior_architecture_and_design.yaml index 3b130381..65a20727 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_interior_architecture_and_design.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_interior_architecture_and_design.yaml @@ -1,3 +1,4 @@ dataset_name: interior_architecture_and_design include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_interior_architecture_and_design +task: kmmlu_direct_hard_interior_architecture_and_design +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_korean_history.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_korean_history.yaml index c4d595d1..c10a9f57 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_korean_history.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_korean_history.yaml @@ -1,3 +1,4 @@ dataset_name: korean_history include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_korean_history +task: kmmlu_direct_hard_korean_history +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_law.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_law.yaml index 168f0340..96e5514f 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_law.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_law.yaml @@ -1,3 +1,4 @@ dataset_name: law include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_law +task: kmmlu_direct_hard_law +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_machine_design_and_manufacturing.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_machine_design_and_manufacturing.yaml index 73665b1b..50dfd63b 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_machine_design_and_manufacturing.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_machine_design_and_manufacturing.yaml @@ -1,3 +1,4 @@ dataset_name: machine_design_and_manufacturing include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_machine_design_and_manufacturing +task: kmmlu_direct_hard_machine_design_and_manufacturing +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_management.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_management.yaml index 6eb945d2..48c339d7 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_management.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_management.yaml @@ -1,3 +1,4 @@ dataset_name: management include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_management +task: kmmlu_direct_hard_management +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_maritime_engineering.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_maritime_engineering.yaml index 4078cf97..937bfd27 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_maritime_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_maritime_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: maritime_engineering include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_maritime_engineering +task: kmmlu_direct_hard_maritime_engineering +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_marketing.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_marketing.yaml index 37d62bb1..1ae4088a 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_marketing.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_marketing.yaml @@ -1,3 +1,4 @@ dataset_name: marketing include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_marketing +task: kmmlu_direct_hard_marketing +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_materials_engineering.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_materials_engineering.yaml index c1e2645c..432460eb 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_materials_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_materials_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: materials_engineering include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_materials_engineering +task: kmmlu_direct_hard_materials_engineering +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_math.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_math.yaml index f5f3373a..53d2fca1 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_math.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_math.yaml @@ -1,3 +1,4 @@ dataset_name: math include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_math +task: kmmlu_direct_hard_math +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_mechanical_engineering.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_mechanical_engineering.yaml index dae55511..1a3994ea 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_mechanical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_mechanical_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: mechanical_engineering include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_mechanical_engineering +task: kmmlu_direct_hard_mechanical_engineering +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_nondestructive_testing.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_nondestructive_testing.yaml index 3ff95837..909c502c 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_nondestructive_testing.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_nondestructive_testing.yaml @@ -1,3 +1,4 @@ dataset_name: nondestructive_testing include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_nondestructive_testing +task: kmmlu_direct_hard_nondestructive_testing +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_patent.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_patent.yaml index d913752b..d8faf972 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_patent.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_patent.yaml @@ -1,3 +1,4 @@ dataset_name: patent include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_patent +task: kmmlu_direct_hard_patent +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_political_science_and_sociology.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_political_science_and_sociology.yaml index 8a5d96b6..0b650507 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_political_science_and_sociology.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_political_science_and_sociology.yaml @@ -1,3 +1,4 @@ dataset_name: political_science_and_sociology include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_political_science_and_sociology +task: kmmlu_direct_hard_political_science_and_sociology +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_psychology.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_psychology.yaml index 9fbf0d31..b1a6f777 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_psychology.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_psychology.yaml @@ -1,3 +1,4 @@ dataset_name: psychology include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_psychology +task: kmmlu_direct_hard_psychology +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_public_safety.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_public_safety.yaml index b376c4eb..3da46294 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_public_safety.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_public_safety.yaml @@ -1,3 +1,4 @@ dataset_name: public_safety include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_public_safety +task: kmmlu_direct_hard_public_safety +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_railway_and_automotive_engineering.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_railway_and_automotive_engineering.yaml index 0eb534e5..74e5e02f 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_railway_and_automotive_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_railway_and_automotive_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: railway_and_automotive_engineering include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_railway_and_automotive_engineering +task: kmmlu_direct_hard_railway_and_automotive_engineering +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_real_estate.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_real_estate.yaml index 9c3df599..8f23fae5 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_real_estate.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_real_estate.yaml @@ -1,3 +1,4 @@ dataset_name: real_estate include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_real_estate +task: kmmlu_direct_hard_real_estate +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_refrigerating_machinery.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_refrigerating_machinery.yaml index f62e8e95..192a1f2c 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_refrigerating_machinery.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_refrigerating_machinery.yaml @@ -1,3 +1,4 @@ dataset_name: refrigerating_machinery include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_refrigerating_machinery +task: kmmlu_direct_hard_refrigerating_machinery +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_social_welfare.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_social_welfare.yaml index ad4dc2cf..c24babc3 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_social_welfare.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_social_welfare.yaml @@ -1,3 +1,4 @@ dataset_name: social_welfare include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_social_welfare +task: kmmlu_direct_hard_social_welfare +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_taxation.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_taxation.yaml index 445ab693..17586af6 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_taxation.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_taxation.yaml @@ -1,3 +1,4 @@ dataset_name: taxation include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_taxation +task: kmmlu_direct_hard_taxation +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_telecommunications_and_wireless_technology.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_telecommunications_and_wireless_technology.yaml index 498b2fb2..bed0df91 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_telecommunications_and_wireless_technology.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_telecommunications_and_wireless_technology.yaml @@ -1,3 +1,4 @@ dataset_name: telecommunications_and_wireless_technology include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_telecommunications_and_wireless_technology +task: kmmlu_direct_hard_telecommunications_and_wireless_technology +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/_hard_kmmlu_yaml b/lm_eval/tasks/kmmlu/hard/_hard_kmmlu_yaml index 26c4105b..b3e69705 100644 --- a/lm_eval/tasks/kmmlu/hard/_hard_kmmlu_yaml +++ b/lm_eval/tasks/kmmlu/hard/_hard_kmmlu_yaml @@ -1,6 +1,3 @@ -tag: - - kmmlu - - kmmlu_hard dataset_path: HAERAE-HUB/KMMLU-HARD output_type: multiple_choice test_split: test @@ -12,8 +9,5 @@ metric_list: - metric: acc aggregation: mean higher_is_better: true - - metric: acc_norm - aggregation: mean - higher_is_better: true metadata: version: 2.0 diff --git a/lm_eval/tasks/kmmlu/hard/_kmmlu_hard.yaml b/lm_eval/tasks/kmmlu/hard/_kmmlu_hard.yaml new file mode 100644 index 00000000..827e74ec --- /dev/null +++ b/lm_eval/tasks/kmmlu/hard/_kmmlu_hard.yaml @@ -0,0 +1,11 @@ +group: kmmlu_hard +task: + - kmmlu_hard_stem + - kmmlu_hard_other + - kmmlu_hard_applied_science + - kmmlu_hard_humss +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_applied_science.yaml b/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_applied_science.yaml new file mode 100644 index 00000000..76d383af --- /dev/null +++ b/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_applied_science.yaml @@ -0,0 +1,8 @@ +group: kmmlu_hard_applied_science +task: + - kmmlu_hard_applied_science_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_humss.yaml b/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_humss.yaml new file mode 100644 index 00000000..39eb5a7a --- /dev/null +++ b/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_humss.yaml @@ -0,0 +1,8 @@ +group: kmmlu_hard_humss +task: + - kmmlu_hard_humss_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_other.yaml b/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_other.yaml new file mode 100644 index 00000000..5759fe88 --- /dev/null +++ b/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_other.yaml @@ -0,0 +1,8 @@ +group: kmmlu_hard_other +task: + - kmmlu_hard_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_stem.yaml b/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_stem.yaml new file mode 100644 index 00000000..ee14c726 --- /dev/null +++ b/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_stem.yaml @@ -0,0 +1,8 @@ +group: kmmlu_hard_stem +task: + - kmmlu_hard_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_accounting.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_accounting.yaml index 8112903b..0c341baa 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_accounting.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_accounting.yaml @@ -1,3 +1,4 @@ dataset_name: accounting include: _hard_kmmlu_yaml task: kmmlu_hard_accounting +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_agricultural_sciences.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_agricultural_sciences.yaml index 3a20948b..90d284c8 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_agricultural_sciences.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_agricultural_sciences.yaml @@ -1,3 +1,4 @@ dataset_name: agricultural_sciences include: _hard_kmmlu_yaml task: kmmlu_hard_agricultural_sciences +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_aviation_engineering_and_maintenance.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_aviation_engineering_and_maintenance.yaml index 87b3845f..5ec90f36 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_aviation_engineering_and_maintenance.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_aviation_engineering_and_maintenance.yaml @@ -1,3 +1,4 @@ dataset_name: aviation_engineering_and_maintenance include: _hard_kmmlu_yaml task: kmmlu_hard_aviation_engineering_and_maintenance +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_biology.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_biology.yaml index 0a28b7c7..045e17e7 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_biology.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_biology.yaml @@ -1,3 +1,4 @@ dataset_name: biology include: _hard_kmmlu_yaml task: kmmlu_hard_biology +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemical_engineering.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemical_engineering.yaml index 8fc448a8..cbfa42eb 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemical_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: chemical_engineering include: _hard_kmmlu_yaml task: kmmlu_hard_chemical_engineering +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemistry.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemistry.yaml index 366c9502..67c65d65 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemistry.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemistry.yaml @@ -1,3 +1,4 @@ dataset_name: chemistry include: _hard_kmmlu_yaml task: kmmlu_hard_chemistry +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_civil_engineering.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_civil_engineering.yaml index ba1a15ad..58e3c87a 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_civil_engineering.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_civil_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: civil_engineering include: _hard_kmmlu_yaml task: kmmlu_hard_civil_engineering +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_computer_science.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_computer_science.yaml index 4e1f1213..42f91467 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_computer_science.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_computer_science.yaml @@ -1,3 +1,4 @@ dataset_name: computer_science include: _hard_kmmlu_yaml task: kmmlu_hard_computer_science +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_construction.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_construction.yaml index 8331379c..55a5a1d0 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_construction.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_construction.yaml @@ -1,3 +1,4 @@ dataset_name: construction include: _hard_kmmlu_yaml task: kmmlu_hard_construction +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_criminal_law.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_criminal_law.yaml index b7acd49a..14e4d5ad 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_criminal_law.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_criminal_law.yaml @@ -1,3 +1,4 @@ dataset_name: criminal_law include: _hard_kmmlu_yaml task: kmmlu_hard_criminal_law +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_ecology.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_ecology.yaml index 6542c1ee..c737b1ab 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_ecology.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_ecology.yaml @@ -1,3 +1,4 @@ dataset_name: ecology include: _hard_kmmlu_yaml task: kmmlu_hard_ecology +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_economics.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_economics.yaml index 4f1bfba0..9a0084dc 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_economics.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_economics.yaml @@ -1,3 +1,4 @@ dataset_name: economics include: _hard_kmmlu_yaml task: kmmlu_hard_economics +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_education.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_education.yaml index 0f6a6a80..568d094d 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_education.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_education.yaml @@ -1,3 +1,4 @@ dataset_name: education include: _hard_kmmlu_yaml task: kmmlu_hard_education +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_electrical_engineering.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_electrical_engineering.yaml index 51625c1e..ad46c486 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_electrical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_electrical_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: electrical_engineering include: _hard_kmmlu_yaml task: kmmlu_hard_electrical_engineering +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_electronics_engineering.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_electronics_engineering.yaml index 252ecc19..843c92a0 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_electronics_engineering.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_electronics_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: electronics_engineering include: _hard_kmmlu_yaml task: kmmlu_hard_electronics_engineering +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_energy_management.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_energy_management.yaml index 062204f1..dcfe7f36 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_energy_management.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_energy_management.yaml @@ -1,3 +1,4 @@ dataset_name: energy_management include: _hard_kmmlu_yaml task: kmmlu_hard_energy_management +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_environmental_science.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_environmental_science.yaml index d7f32dc5..a0ae1b81 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_environmental_science.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_environmental_science.yaml @@ -1,3 +1,4 @@ dataset_name: environmental_science include: _hard_kmmlu_yaml task: kmmlu_hard_environmental_science +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_fashion.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_fashion.yaml index 9448efcf..3ba973ba 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_fashion.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_fashion.yaml @@ -1,3 +1,4 @@ dataset_name: fashion include: _hard_kmmlu_yaml task: kmmlu_hard_fashion +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_food_processing.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_food_processing.yaml index 138920ef..cd08fe3b 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_food_processing.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_food_processing.yaml @@ -1,3 +1,4 @@ dataset_name: food_processing include: _hard_kmmlu_yaml task: kmmlu_hard_food_processing +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_gas_technology_and_engineering.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_gas_technology_and_engineering.yaml index 14e213b5..fe30680a 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_gas_technology_and_engineering.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_gas_technology_and_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: gas_technology_and_engineering include: _hard_kmmlu_yaml task: kmmlu_hard_gas_technology_and_engineering +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_geomatics.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_geomatics.yaml index 0370a7a7..53b52e96 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_geomatics.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_geomatics.yaml @@ -1,3 +1,4 @@ dataset_name: geomatics include: _hard_kmmlu_yaml task: kmmlu_hard_geomatics +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_health.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_health.yaml index c5e2ba98..dcd2b179 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_health.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_health.yaml @@ -1,3 +1,4 @@ dataset_name: health include: _hard_kmmlu_yaml task: kmmlu_hard_health +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_industrial_engineer.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_industrial_engineer.yaml index d3cbef78..2e8449ff 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_industrial_engineer.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_industrial_engineer.yaml @@ -1,3 +1,4 @@ dataset_name: industrial_engineer include: _hard_kmmlu_yaml task: kmmlu_hard_industrial_engineer +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_information_technology.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_information_technology.yaml index 4af23d30..86ded35d 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_information_technology.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_information_technology.yaml @@ -1,3 +1,4 @@ dataset_name: information_technology include: _hard_kmmlu_yaml task: kmmlu_hard_information_technology +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_interior_architecture_and_design.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_interior_architecture_and_design.yaml index 76bfe50c..55de2641 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_interior_architecture_and_design.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_interior_architecture_and_design.yaml @@ -1,3 +1,4 @@ dataset_name: interior_architecture_and_design include: _hard_kmmlu_yaml task: kmmlu_hard_interior_architecture_and_design +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_korean_history.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_korean_history.yaml index 60ff94e7..4d4152b7 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_korean_history.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_korean_history.yaml @@ -1,3 +1,4 @@ dataset_name: korean_history include: _hard_kmmlu_yaml task: kmmlu_hard_korean_history +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_law.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_law.yaml index aeec24dc..0a75d904 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_law.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_law.yaml @@ -1,3 +1,4 @@ dataset_name: law include: _hard_kmmlu_yaml task: kmmlu_hard_law +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_machine_design_and_manufacturing.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_machine_design_and_manufacturing.yaml index 222f89ba..210ffd8f 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_machine_design_and_manufacturing.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_machine_design_and_manufacturing.yaml @@ -1,3 +1,4 @@ dataset_name: machine_design_and_manufacturing include: _hard_kmmlu_yaml task: kmmlu_hard_machine_design_and_manufacturing +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_management.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_management.yaml index 8e9e8664..d3f27519 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_management.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_management.yaml @@ -1,3 +1,4 @@ dataset_name: management include: _hard_kmmlu_yaml task: kmmlu_hard_management +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_maritime_engineering.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_maritime_engineering.yaml index e68041d5..dec43bc8 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_maritime_engineering.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_maritime_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: maritime_engineering include: _hard_kmmlu_yaml task: kmmlu_hard_maritime_engineering +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_marketing.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_marketing.yaml index 54a62d62..f86cfe17 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_marketing.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_marketing.yaml @@ -1,3 +1,4 @@ dataset_name: marketing include: _hard_kmmlu_yaml task: kmmlu_hard_marketing +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_materials_engineering.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_materials_engineering.yaml index 4582b0f3..684120a0 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_materials_engineering.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_materials_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: materials_engineering include: _hard_kmmlu_yaml task: kmmlu_hard_materials_engineering +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_math.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_math.yaml index e5637176..ed125f90 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_math.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_math.yaml @@ -1,3 +1,4 @@ dataset_name: math include: _hard_kmmlu_yaml task: kmmlu_hard_math +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_mechanical_engineering.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_mechanical_engineering.yaml index 9b3adca0..b6d00e2e 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_mechanical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_mechanical_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: mechanical_engineering include: _hard_kmmlu_yaml task: kmmlu_hard_mechanical_engineering +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_nondestructive_testing.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_nondestructive_testing.yaml index 21c25fc8..acf3ed9f 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_nondestructive_testing.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_nondestructive_testing.yaml @@ -1,3 +1,4 @@ dataset_name: nondestructive_testing include: _hard_kmmlu_yaml task: kmmlu_hard_nondestructive_testing +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_patent.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_patent.yaml index 3fcdcd96..910f11c5 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_patent.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_patent.yaml @@ -1,3 +1,4 @@ dataset_name: patent include: _hard_kmmlu_yaml task: kmmlu_hard_patent +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_political_science_and_sociology.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_political_science_and_sociology.yaml index 6bb907cb..7b7addfd 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_political_science_and_sociology.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_political_science_and_sociology.yaml @@ -1,3 +1,4 @@ dataset_name: political_science_and_sociology include: _hard_kmmlu_yaml task: kmmlu_hard_political_science_and_sociology +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_psychology.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_psychology.yaml index c79cef1f..a6d8b754 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_psychology.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_psychology.yaml @@ -1,3 +1,4 @@ dataset_name: psychology include: _hard_kmmlu_yaml task: kmmlu_hard_psychology +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_public_safety.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_public_safety.yaml index 110bd147..8b04b78e 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_public_safety.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_public_safety.yaml @@ -1,3 +1,4 @@ dataset_name: public_safety include: _hard_kmmlu_yaml task: kmmlu_hard_public_safety +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_railway_and_automotive_engineering.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_railway_and_automotive_engineering.yaml index 31b610f7..358b7e36 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_railway_and_automotive_engineering.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_railway_and_automotive_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: railway_and_automotive_engineering include: _hard_kmmlu_yaml task: kmmlu_hard_railway_and_automotive_engineering +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_real_estate.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_real_estate.yaml index bd1b32c8..9010e2a7 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_real_estate.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_real_estate.yaml @@ -1,3 +1,4 @@ dataset_name: real_estate include: _hard_kmmlu_yaml task: kmmlu_hard_real_estate +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_refrigerating_machinery.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_refrigerating_machinery.yaml index 8c7dd139..5f03b70b 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_refrigerating_machinery.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_refrigerating_machinery.yaml @@ -1,3 +1,4 @@ dataset_name: refrigerating_machinery include: _hard_kmmlu_yaml task: kmmlu_hard_refrigerating_machinery +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_social_welfare.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_social_welfare.yaml index 12502a57..24f105e4 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_social_welfare.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_social_welfare.yaml @@ -1,3 +1,4 @@ dataset_name: social_welfare include: _hard_kmmlu_yaml task: kmmlu_hard_social_welfare +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_taxation.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_taxation.yaml index f0f815ab..7d0bbf86 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_taxation.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_taxation.yaml @@ -1,3 +1,4 @@ dataset_name: taxation include: _hard_kmmlu_yaml task: kmmlu_hard_taxation +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_telecommunications_and_wireless_technology.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_telecommunications_and_wireless_technology.yaml index 0cb519d1..c1398c5f 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_telecommunications_and_wireless_technology.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_telecommunications_and_wireless_technology.yaml @@ -1,3 +1,4 @@ dataset_name: telecommunications_and_wireless_technology include: _hard_kmmlu_yaml task: kmmlu_hard_telecommunications_and_wireless_technology +tag: kmmlu_hard_applied_science_tasks -- GitLab From 370e2f9e5bbe59912644b1b6e052e17be31d6858 Mon Sep 17 00:00:00 2001 From: Jan Kaniecki Date: Tue, 21 Jan 2025 17:55:23 +0100 Subject: [PATCH 11/19] Fix max_tokens handling in vllm_vlms.py (#2637) * Update vllm_vlms.py * pre-commit --------- Co-authored-by: Baber --- lm_eval/models/vllm_vlms.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lm_eval/models/vllm_vlms.py b/lm_eval/models/vllm_vlms.py index ab216ab5..a0d72926 100644 --- a/lm_eval/models/vllm_vlms.py +++ b/lm_eval/models/vllm_vlms.py @@ -271,7 +271,9 @@ class VLLM_VLM(VLLM): left_truncate_len=max_ctx_len, ) - cont = self._model_generate(inputs, stop=until, generate=True, **kwargs) + cont = self._model_generate( + inputs, stop=until, generate=True, max_tokens=max_gen_toks, **kwargs + ) for output, context in zip(cont, contexts): generated_text = output.outputs[0].text -- GitLab From 5c006ed417a2f4d01248d487bcbd493ebe3e5edd Mon Sep 17 00:00:00 2001 From: Minho Ryu Date: Sat, 25 Jan 2025 01:00:29 +0900 Subject: [PATCH 12/19] separate category for `global_mmlu` (#2652) * separate category * set version 0.0 * apply precommit --- .../global_mmlu/default/_generate_configs.py | 42 ------------------- .../{_default_yaml => ar/_ar_template_yaml} | 3 +- .../default/ar/_global_mmlu_ar.yaml | 13 ++++++ .../default/ar/global_mmlu_ar_business.yaml | 4 ++ .../default/ar/global_mmlu_ar_humanities.yaml | 4 ++ .../default/ar/global_mmlu_ar_medical.yaml | 4 ++ .../default/ar/global_mmlu_ar_other.yaml | 4 ++ .../ar/global_mmlu_ar_social_sciences.yaml | 4 ++ .../default/ar/global_mmlu_ar_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/ar/utils.py | 18 ++++++++ .../global_mmlu/default/bn/_bn_template_yaml | 16 +++++++ .../default/bn/_global_mmlu_bn.yaml | 13 ++++++ .../default/bn/global_mmlu_bn_business.yaml | 4 ++ .../default/bn/global_mmlu_bn_humanities.yaml | 4 ++ .../default/bn/global_mmlu_bn_medical.yaml | 4 ++ .../default/bn/global_mmlu_bn_other.yaml | 4 ++ .../bn/global_mmlu_bn_social_sciences.yaml | 4 ++ .../default/bn/global_mmlu_bn_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/bn/utils.py | 18 ++++++++ .../global_mmlu/default/de/_de_template_yaml | 16 +++++++ .../default/de/_global_mmlu_de.yaml | 13 ++++++ .../default/de/global_mmlu_de_business.yaml | 4 ++ .../default/de/global_mmlu_de_humanities.yaml | 4 ++ .../default/de/global_mmlu_de_medical.yaml | 4 ++ .../default/de/global_mmlu_de_other.yaml | 4 ++ .../de/global_mmlu_de_social_sciences.yaml | 4 ++ .../default/de/global_mmlu_de_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/de/utils.py | 18 ++++++++ .../global_mmlu/default/en/_en_template_yaml | 16 +++++++ .../default/en/_global_mmlu_en.yaml | 13 ++++++ .../default/en/global_mmlu_en_business.yaml | 4 ++ .../default/en/global_mmlu_en_humanities.yaml | 4 ++ .../default/en/global_mmlu_en_medical.yaml | 4 ++ .../default/en/global_mmlu_en_other.yaml | 4 ++ .../en/global_mmlu_en_social_sciences.yaml | 4 ++ .../default/en/global_mmlu_en_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/en/utils.py | 18 ++++++++ .../global_mmlu/default/es/_es_template_yaml | 16 +++++++ .../default/es/_global_mmlu_es.yaml | 13 ++++++ .../default/es/global_mmlu_es_business.yaml | 4 ++ .../default/es/global_mmlu_es_humanities.yaml | 4 ++ .../default/es/global_mmlu_es_medical.yaml | 4 ++ .../default/es/global_mmlu_es_other.yaml | 4 ++ .../es/global_mmlu_es_social_sciences.yaml | 4 ++ .../default/es/global_mmlu_es_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/es/utils.py | 18 ++++++++ .../global_mmlu/default/fr/_fr_template_yaml | 16 +++++++ .../default/fr/_global_mmlu_fr.yaml | 13 ++++++ .../default/fr/global_mmlu_fr_business.yaml | 4 ++ .../default/fr/global_mmlu_fr_humanities.yaml | 4 ++ .../default/fr/global_mmlu_fr_medical.yaml | 4 ++ .../default/fr/global_mmlu_fr_other.yaml | 4 ++ .../fr/global_mmlu_fr_social_sciences.yaml | 4 ++ .../default/fr/global_mmlu_fr_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/fr/utils.py | 18 ++++++++ .../global_mmlu/default/global_mmlu_ar.yaml | 4 -- .../global_mmlu/default/global_mmlu_bn.yaml | 4 -- .../global_mmlu/default/global_mmlu_de.yaml | 4 -- .../global_mmlu/default/global_mmlu_en.yaml | 4 -- .../global_mmlu/default/global_mmlu_es.yaml | 4 -- .../global_mmlu/default/global_mmlu_fr.yaml | 4 -- .../global_mmlu/default/global_mmlu_hi.yaml | 4 -- .../global_mmlu/default/global_mmlu_id.yaml | 4 -- .../global_mmlu/default/global_mmlu_it.yaml | 4 -- .../global_mmlu/default/global_mmlu_ja.yaml | 4 -- .../global_mmlu/default/global_mmlu_ko.yaml | 4 -- .../global_mmlu/default/global_mmlu_pt.yaml | 4 -- .../global_mmlu/default/global_mmlu_sw.yaml | 4 -- .../global_mmlu/default/global_mmlu_yo.yaml | 4 -- .../global_mmlu/default/global_mmlu_zh.yaml | 4 -- .../default/hi/_global_mmlu_hi.yaml | 13 ++++++ .../global_mmlu/default/hi/_hi_template_yaml | 16 +++++++ .../default/hi/global_mmlu_hi_business.yaml | 4 ++ .../default/hi/global_mmlu_hi_humanities.yaml | 4 ++ .../default/hi/global_mmlu_hi_medical.yaml | 4 ++ .../default/hi/global_mmlu_hi_other.yaml | 4 ++ .../hi/global_mmlu_hi_social_sciences.yaml | 4 ++ .../default/hi/global_mmlu_hi_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/hi/utils.py | 18 ++++++++ .../default/id/_global_mmlu_id.yaml | 13 ++++++ .../global_mmlu/default/id/_id_template_yaml | 16 +++++++ .../default/id/global_mmlu_id_business.yaml | 4 ++ .../default/id/global_mmlu_id_humanities.yaml | 4 ++ .../default/id/global_mmlu_id_medical.yaml | 4 ++ .../default/id/global_mmlu_id_other.yaml | 4 ++ .../id/global_mmlu_id_social_sciences.yaml | 4 ++ .../default/id/global_mmlu_id_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/id/utils.py | 18 ++++++++ .../default/it/_global_mmlu_it.yaml | 13 ++++++ .../global_mmlu/default/it/_it_template_yaml | 16 +++++++ .../default/it/global_mmlu_it_business.yaml | 4 ++ .../default/it/global_mmlu_it_humanities.yaml | 4 ++ .../default/it/global_mmlu_it_medical.yaml | 4 ++ .../default/it/global_mmlu_it_other.yaml | 4 ++ .../it/global_mmlu_it_social_sciences.yaml | 4 ++ .../default/it/global_mmlu_it_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/it/utils.py | 18 ++++++++ .../default/ja/_global_mmlu_ja.yaml | 13 ++++++ .../global_mmlu/default/ja/_ja_template_yaml | 16 +++++++ .../default/ja/global_mmlu_ja_business.yaml | 4 ++ .../default/ja/global_mmlu_ja_humanities.yaml | 4 ++ .../default/ja/global_mmlu_ja_medical.yaml | 4 ++ .../default/ja/global_mmlu_ja_other.yaml | 4 ++ .../ja/global_mmlu_ja_social_sciences.yaml | 4 ++ .../default/ja/global_mmlu_ja_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/ja/utils.py | 18 ++++++++ .../default/ko/_global_mmlu_ko.yaml | 13 ++++++ .../global_mmlu/default/ko/_ko_template_yaml | 16 +++++++ .../default/ko/global_mmlu_ko_business.yaml | 4 ++ .../default/ko/global_mmlu_ko_humanities.yaml | 4 ++ .../default/ko/global_mmlu_ko_medical.yaml | 4 ++ .../default/ko/global_mmlu_ko_other.yaml | 4 ++ .../ko/global_mmlu_ko_social_sciences.yaml | 4 ++ .../default/ko/global_mmlu_ko_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/ko/utils.py | 18 ++++++++ .../default/pt/_global_mmlu_pt.yaml | 13 ++++++ .../global_mmlu/default/pt/_pt_template_yaml | 16 +++++++ .../default/pt/global_mmlu_pt_business.yaml | 4 ++ .../default/pt/global_mmlu_pt_humanities.yaml | 4 ++ .../default/pt/global_mmlu_pt_medical.yaml | 4 ++ .../default/pt/global_mmlu_pt_other.yaml | 4 ++ .../pt/global_mmlu_pt_social_sciences.yaml | 4 ++ .../default/pt/global_mmlu_pt_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/pt/utils.py | 18 ++++++++ .../default/sw/_global_mmlu_sw.yaml | 13 ++++++ .../global_mmlu/default/sw/_sw_template_yaml | 16 +++++++ .../default/sw/global_mmlu_sw_business.yaml | 4 ++ .../default/sw/global_mmlu_sw_humanities.yaml | 4 ++ .../default/sw/global_mmlu_sw_medical.yaml | 4 ++ .../default/sw/global_mmlu_sw_other.yaml | 4 ++ .../sw/global_mmlu_sw_social_sciences.yaml | 4 ++ .../default/sw/global_mmlu_sw_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/sw/utils.py | 18 ++++++++ .../default/yo/_global_mmlu_yo.yaml | 13 ++++++ .../global_mmlu/default/yo/_yo_template_yaml | 16 +++++++ .../default/yo/global_mmlu_yo_business.yaml | 4 ++ .../default/yo/global_mmlu_yo_humanities.yaml | 4 ++ .../default/yo/global_mmlu_yo_medical.yaml | 4 ++ .../default/yo/global_mmlu_yo_other.yaml | 4 ++ .../yo/global_mmlu_yo_social_sciences.yaml | 4 ++ .../default/yo/global_mmlu_yo_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/yo/utils.py | 18 ++++++++ .../default/zh/_global_mmlu_zh.yaml | 13 ++++++ .../global_mmlu/default/zh/_zh_template_yaml | 16 +++++++ .../default/zh/global_mmlu_zh_business.yaml | 4 ++ .../default/zh/global_mmlu_zh_humanities.yaml | 4 ++ .../default/zh/global_mmlu_zh_medical.yaml | 4 ++ .../default/zh/global_mmlu_zh_other.yaml | 4 ++ .../zh/global_mmlu_zh_social_sciences.yaml | 4 ++ .../default/zh/global_mmlu_zh_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/zh/utils.py | 18 ++++++++ .../full/am/_global_mmlu_full_am.yaml | 2 +- .../full/ar/_global_mmlu_full_ar.yaml | 2 +- .../full/bn/_global_mmlu_full_bn.yaml | 2 +- .../full/cs/_global_mmlu_full_cs.yaml | 2 +- .../full/de/_global_mmlu_full_de.yaml | 2 +- .../full/el/_global_mmlu_full_el.yaml | 2 +- .../full/en/_global_mmlu_full_en.yaml | 2 +- .../full/es/_global_mmlu_full_es.yaml | 2 +- .../full/fa/_global_mmlu_full_fa.yaml | 2 +- .../full/fil/_global_mmlu_full_fil.yaml | 2 +- .../full/fr/_global_mmlu_full_fr.yaml | 2 +- .../full/ha/_global_mmlu_full_ha.yaml | 2 +- .../full/he/_global_mmlu_full_he.yaml | 2 +- .../full/hi/_global_mmlu_full_hi.yaml | 2 +- .../full/id/_global_mmlu_full_id.yaml | 2 +- .../full/ig/_global_mmlu_full_ig.yaml | 2 +- .../full/it/_global_mmlu_full_it.yaml | 2 +- .../full/ja/_global_mmlu_full_ja.yaml | 2 +- .../full/ko/_global_mmlu_full_ko.yaml | 2 +- .../full/ky/_global_mmlu_full_ky.yaml | 2 +- .../full/lt/_global_mmlu_full_lt.yaml | 2 +- .../full/mg/_global_mmlu_full_mg.yaml | 2 +- .../full/ms/_global_mmlu_full_ms.yaml | 2 +- .../full/ne/_global_mmlu_full_ne.yaml | 2 +- .../full/nl/_global_mmlu_full_nl.yaml | 2 +- .../full/ny/_global_mmlu_full_ny.yaml | 2 +- .../full/pl/_global_mmlu_full_pl.yaml | 2 +- .../full/pt/_global_mmlu_full_pt.yaml | 2 +- .../full/ro/_global_mmlu_full_ro.yaml | 2 +- .../full/ru/_global_mmlu_full_ru.yaml | 2 +- .../full/si/_global_mmlu_full_si.yaml | 2 +- .../full/sn/_global_mmlu_full_sn.yaml | 2 +- .../full/so/_global_mmlu_full_so.yaml | 2 +- .../full/sr/_global_mmlu_full_sr.yaml | 2 +- .../full/sv/_global_mmlu_full_sv.yaml | 2 +- .../full/sw/_global_mmlu_full_sw.yaml | 2 +- .../full/te/_global_mmlu_full_te.yaml | 2 +- .../full/tr/_global_mmlu_full_tr.yaml | 2 +- .../full/uk/_global_mmlu_full_uk.yaml | 2 +- .../full/vi/_global_mmlu_full_vi.yaml | 2 +- .../full/yo/_global_mmlu_full_yo.yaml | 2 +- .../full/zh/_global_mmlu_full_zh.yaml | 2 +- 193 files changed, 1092 insertions(+), 146 deletions(-) delete mode 100644 lm_eval/tasks/global_mmlu/default/_generate_configs.py rename lm_eval/tasks/global_mmlu/default/{_default_yaml => ar/_ar_template_yaml} (95%) create mode 100644 lm_eval/tasks/global_mmlu/default/ar/_global_mmlu_ar.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ar/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/bn/_bn_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/bn/_global_mmlu_bn.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/bn/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/de/_de_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/de/_global_mmlu_de.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/de/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/en/_en_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/en/_global_mmlu_en.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/en/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/es/_es_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/es/_global_mmlu_es.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/es/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/fr/_fr_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/fr/_global_mmlu_fr.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/fr/utils.py delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_ar.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_bn.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_de.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_en.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_es.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_fr.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_hi.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_id.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_it.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_ja.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_ko.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_pt.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_sw.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_yo.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_zh.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/hi/_global_mmlu_hi.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/hi/_hi_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/hi/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/id/_global_mmlu_id.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/id/_id_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/id/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/it/_global_mmlu_it.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/it/_it_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/it/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/ja/_global_mmlu_ja.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ja/_ja_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ja/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/ko/_global_mmlu_ko.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ko/_ko_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ko/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/pt/_global_mmlu_pt.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/pt/_pt_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/pt/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/sw/_global_mmlu_sw.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/sw/_sw_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/sw/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/yo/_global_mmlu_yo.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/yo/_yo_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/yo/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/zh/_global_mmlu_zh.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/zh/_zh_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/zh/utils.py diff --git a/lm_eval/tasks/global_mmlu/default/_generate_configs.py b/lm_eval/tasks/global_mmlu/default/_generate_configs.py deleted file mode 100644 index 58e169c6..00000000 --- a/lm_eval/tasks/global_mmlu/default/_generate_configs.py +++ /dev/null @@ -1,42 +0,0 @@ -import yaml - - -languages = [ - "en", - "ar", - "fr", - "es", - "hi", - "de", - "id", - "it", - "ja", - "ko", - "pt", - "zh", - "yo", - "bn", - "sw", -] - - -def main() -> None: - for language in languages: - file_name = f"global_mmlu_{language}.yaml" - try: - with open(f"{file_name}", "w") as f: - f.write("# Generated by _generate_configs.py\n") - yaml.dump( - { - "include": "_default_yaml", - "task": f"global_mmlu_{language}", - "dataset_name": language, - }, - f, - ) - except FileExistsError: - pass - - -if __name__ == "__main__": - main() diff --git a/lm_eval/tasks/global_mmlu/default/_default_yaml b/lm_eval/tasks/global_mmlu/default/ar/_ar_template_yaml similarity index 95% rename from lm_eval/tasks/global_mmlu/default/_default_yaml rename to lm_eval/tasks/global_mmlu/default/ar/_ar_template_yaml index 33a1fc35..3fa8f23f 100644 --- a/lm_eval/tasks/global_mmlu/default/_default_yaml +++ b/lm_eval/tasks/global_mmlu/default/ar/_ar_template_yaml @@ -1,6 +1,5 @@ -tag: - - global_mmlu dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: ar test_split: test fewshot_split: dev fewshot_config: diff --git a/lm_eval/tasks/global_mmlu/default/ar/_global_mmlu_ar.yaml b/lm_eval/tasks/global_mmlu/default/ar/_global_mmlu_ar.yaml new file mode 100644 index 00000000..27f6e1a4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ar/_global_mmlu_ar.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_ar +task: + - global_mmlu_ar_business + - global_mmlu_ar_humanities + - global_mmlu_ar_medical + - global_mmlu_ar_other + - global_mmlu_ar_stem + - global_mmlu_ar_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_business.yaml b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_business.yaml new file mode 100644 index 00000000..c7f47fdf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_ar_business diff --git a/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_humanities.yaml b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_humanities.yaml new file mode 100644 index 00000000..c35f1f6e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_ar_humanities diff --git a/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_medical.yaml b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_medical.yaml new file mode 100644 index 00000000..cb405486 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_ar_medical diff --git a/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_other.yaml b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_other.yaml new file mode 100644 index 00000000..1ffd9be8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_ar_other diff --git a/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_social_sciences.yaml new file mode 100644 index 00000000..037e25a8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_ar_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_stem.yaml b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_stem.yaml new file mode 100644 index 00000000..f2ed28c7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_ar_stem diff --git a/lm_eval/tasks/global_mmlu/default/ar/utils.py b/lm_eval/tasks/global_mmlu/default/ar/utils.py new file mode 100644 index 00000000..507a41bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ar/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/bn/_bn_template_yaml b/lm_eval/tasks/global_mmlu/default/bn/_bn_template_yaml new file mode 100644 index 00000000..c9a234db --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/bn/_bn_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: bn +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/bn/_global_mmlu_bn.yaml b/lm_eval/tasks/global_mmlu/default/bn/_global_mmlu_bn.yaml new file mode 100644 index 00000000..4098af1a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/bn/_global_mmlu_bn.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_bn +task: + - global_mmlu_bn_business + - global_mmlu_bn_humanities + - global_mmlu_bn_medical + - global_mmlu_bn_other + - global_mmlu_bn_stem + - global_mmlu_bn_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_business.yaml b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_business.yaml new file mode 100644 index 00000000..c77589c3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_bn_business diff --git a/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_humanities.yaml b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_humanities.yaml new file mode 100644 index 00000000..da495c6d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_bn_humanities diff --git a/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_medical.yaml b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_medical.yaml new file mode 100644 index 00000000..867e5e4e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_bn_medical diff --git a/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_other.yaml b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_other.yaml new file mode 100644 index 00000000..c44b6d75 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_bn_other diff --git a/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_social_sciences.yaml new file mode 100644 index 00000000..7bbde182 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_bn_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_stem.yaml b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_stem.yaml new file mode 100644 index 00000000..433ba8b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_bn_stem diff --git a/lm_eval/tasks/global_mmlu/default/bn/utils.py b/lm_eval/tasks/global_mmlu/default/bn/utils.py new file mode 100644 index 00000000..507a41bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/bn/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/de/_de_template_yaml b/lm_eval/tasks/global_mmlu/default/de/_de_template_yaml new file mode 100644 index 00000000..7c17e2d0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/de/_de_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: de +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/de/_global_mmlu_de.yaml b/lm_eval/tasks/global_mmlu/default/de/_global_mmlu_de.yaml new file mode 100644 index 00000000..1a54aace --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/de/_global_mmlu_de.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_de +task: + - global_mmlu_de_business + - global_mmlu_de_humanities + - global_mmlu_de_medical + - global_mmlu_de_other + - global_mmlu_de_stem + - global_mmlu_de_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_business.yaml b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_business.yaml new file mode 100644 index 00000000..eba9514c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_de_business diff --git a/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_humanities.yaml b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_humanities.yaml new file mode 100644 index 00000000..d37de491 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_de_humanities diff --git a/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_medical.yaml b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_medical.yaml new file mode 100644 index 00000000..f114de46 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_de_medical diff --git a/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_other.yaml b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_other.yaml new file mode 100644 index 00000000..d6089b2d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_de_other diff --git a/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_social_sciences.yaml new file mode 100644 index 00000000..853711f3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_de_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_stem.yaml b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_stem.yaml new file mode 100644 index 00000000..ef66d3cf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_de_stem diff --git a/lm_eval/tasks/global_mmlu/default/de/utils.py b/lm_eval/tasks/global_mmlu/default/de/utils.py new file mode 100644 index 00000000..507a41bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/de/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/en/_en_template_yaml b/lm_eval/tasks/global_mmlu/default/en/_en_template_yaml new file mode 100644 index 00000000..e24d7983 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/en/_en_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: en +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/en/_global_mmlu_en.yaml b/lm_eval/tasks/global_mmlu/default/en/_global_mmlu_en.yaml new file mode 100644 index 00000000..fc927412 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/en/_global_mmlu_en.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_en +task: + - global_mmlu_en_business + - global_mmlu_en_humanities + - global_mmlu_en_medical + - global_mmlu_en_other + - global_mmlu_en_stem + - global_mmlu_en_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_business.yaml b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_business.yaml new file mode 100644 index 00000000..aa3f4bc1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_en_business diff --git a/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_humanities.yaml b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_humanities.yaml new file mode 100644 index 00000000..c2a20e29 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_en_humanities diff --git a/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_medical.yaml b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_medical.yaml new file mode 100644 index 00000000..ba991459 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_en_medical diff --git a/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_other.yaml b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_other.yaml new file mode 100644 index 00000000..c14d7657 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_en_other diff --git a/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_social_sciences.yaml new file mode 100644 index 00000000..d576d2c1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_en_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_stem.yaml b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_stem.yaml new file mode 100644 index 00000000..fd0179f2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_en_stem diff --git a/lm_eval/tasks/global_mmlu/default/en/utils.py b/lm_eval/tasks/global_mmlu/default/en/utils.py new file mode 100644 index 00000000..507a41bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/en/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/es/_es_template_yaml b/lm_eval/tasks/global_mmlu/default/es/_es_template_yaml new file mode 100644 index 00000000..b0942331 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/es/_es_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: es +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/es/_global_mmlu_es.yaml b/lm_eval/tasks/global_mmlu/default/es/_global_mmlu_es.yaml new file mode 100644 index 00000000..614b1b0f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/es/_global_mmlu_es.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_es +task: + - global_mmlu_es_business + - global_mmlu_es_humanities + - global_mmlu_es_medical + - global_mmlu_es_other + - global_mmlu_es_stem + - global_mmlu_es_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_business.yaml b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_business.yaml new file mode 100644 index 00000000..388251a2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_es_business diff --git a/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_humanities.yaml b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_humanities.yaml new file mode 100644 index 00000000..fd51574b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_es_humanities diff --git a/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_medical.yaml b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_medical.yaml new file mode 100644 index 00000000..649ad70d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_es_medical diff --git a/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_other.yaml b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_other.yaml new file mode 100644 index 00000000..878251d1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_es_other diff --git a/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_social_sciences.yaml new file mode 100644 index 00000000..1e97c6ad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_es_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_stem.yaml b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_stem.yaml new file mode 100644 index 00000000..45b4fa4a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_es_stem diff --git a/lm_eval/tasks/global_mmlu/default/es/utils.py b/lm_eval/tasks/global_mmlu/default/es/utils.py new file mode 100644 index 00000000..507a41bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/es/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/fr/_fr_template_yaml b/lm_eval/tasks/global_mmlu/default/fr/_fr_template_yaml new file mode 100644 index 00000000..a2c6fc88 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/fr/_fr_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: fr +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/fr/_global_mmlu_fr.yaml b/lm_eval/tasks/global_mmlu/default/fr/_global_mmlu_fr.yaml new file mode 100644 index 00000000..d65a2e25 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/fr/_global_mmlu_fr.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_fr +task: + - global_mmlu_fr_business + - global_mmlu_fr_humanities + - global_mmlu_fr_medical + - global_mmlu_fr_other + - global_mmlu_fr_stem + - global_mmlu_fr_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_business.yaml b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_business.yaml new file mode 100644 index 00000000..49f8543b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_fr_business diff --git a/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_humanities.yaml b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_humanities.yaml new file mode 100644 index 00000000..35d0086b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_fr_humanities diff --git a/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_medical.yaml b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_medical.yaml new file mode 100644 index 00000000..e411a347 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_fr_medical diff --git a/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_other.yaml b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_other.yaml new file mode 100644 index 00000000..5bece303 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_fr_other diff --git a/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_social_sciences.yaml new file mode 100644 index 00000000..4e26ceab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_fr_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_stem.yaml b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_stem.yaml new file mode 100644 index 00000000..6d3d1538 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_fr_stem diff --git a/lm_eval/tasks/global_mmlu/default/fr/utils.py b/lm_eval/tasks/global_mmlu/default/fr/utils.py new file mode 100644 index 00000000..507a41bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/fr/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_ar.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_ar.yaml deleted file mode 100644 index 703f420a..00000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_ar.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: ar -include: _default_yaml -task: global_mmlu_ar diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_bn.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_bn.yaml deleted file mode 100644 index f85b67a2..00000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_bn.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: bn -include: _default_yaml -task: global_mmlu_bn diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_de.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_de.yaml deleted file mode 100644 index a874c64f..00000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_de.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: de -include: _default_yaml -task: global_mmlu_de diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_en.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_en.yaml deleted file mode 100644 index 34a6d712..00000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_en.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: en -include: _default_yaml -task: global_mmlu_en diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_es.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_es.yaml deleted file mode 100644 index 75abc775..00000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_es.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: es -include: _default_yaml -task: global_mmlu_es diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_fr.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_fr.yaml deleted file mode 100644 index 1a66f536..00000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_fr.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: fr -include: _default_yaml -task: global_mmlu_fr diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_hi.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_hi.yaml deleted file mode 100644 index 788f95f2..00000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_hi.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: hi -include: _default_yaml -task: global_mmlu_hi diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_id.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_id.yaml deleted file mode 100644 index f4b6d507..00000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_id.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: id -include: _default_yaml -task: global_mmlu_id diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_it.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_it.yaml deleted file mode 100644 index 5b55df97..00000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_it.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: it -include: _default_yaml -task: global_mmlu_it diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_ja.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_ja.yaml deleted file mode 100644 index 97d9c6ca..00000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_ja.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: ja -include: _default_yaml -task: global_mmlu_ja diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_ko.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_ko.yaml deleted file mode 100644 index 02b7fe03..00000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_ko.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: ko -include: _default_yaml -task: global_mmlu_ko diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_pt.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_pt.yaml deleted file mode 100644 index 724bfb4d..00000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_pt.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: pt -include: _default_yaml -task: global_mmlu_pt diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_sw.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_sw.yaml deleted file mode 100644 index 481232fa..00000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_sw.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: sw -include: _default_yaml -task: global_mmlu_sw diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_yo.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_yo.yaml deleted file mode 100644 index c6ec2f9e..00000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_yo.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: yo -include: _default_yaml -task: global_mmlu_yo diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_zh.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_zh.yaml deleted file mode 100644 index 862d46ad..00000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_zh.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: zh -include: _default_yaml -task: global_mmlu_zh diff --git a/lm_eval/tasks/global_mmlu/default/hi/_global_mmlu_hi.yaml b/lm_eval/tasks/global_mmlu/default/hi/_global_mmlu_hi.yaml new file mode 100644 index 00000000..406b27a6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/hi/_global_mmlu_hi.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_hi +task: + - global_mmlu_hi_business + - global_mmlu_hi_humanities + - global_mmlu_hi_medical + - global_mmlu_hi_other + - global_mmlu_hi_stem + - global_mmlu_hi_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/hi/_hi_template_yaml b/lm_eval/tasks/global_mmlu/default/hi/_hi_template_yaml new file mode 100644 index 00000000..180dee96 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/hi/_hi_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: hi +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_business.yaml b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_business.yaml new file mode 100644 index 00000000..63b516c5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_hi_business diff --git a/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_humanities.yaml b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_humanities.yaml new file mode 100644 index 00000000..d8e888cd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_hi_humanities diff --git a/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_medical.yaml b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_medical.yaml new file mode 100644 index 00000000..46a21957 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_hi_medical diff --git a/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_other.yaml b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_other.yaml new file mode 100644 index 00000000..ea242d7a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_hi_other diff --git a/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_social_sciences.yaml new file mode 100644 index 00000000..df95b8c4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_hi_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_stem.yaml b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_stem.yaml new file mode 100644 index 00000000..acab4f12 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_hi_stem diff --git a/lm_eval/tasks/global_mmlu/default/hi/utils.py b/lm_eval/tasks/global_mmlu/default/hi/utils.py new file mode 100644 index 00000000..507a41bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/hi/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/id/_global_mmlu_id.yaml b/lm_eval/tasks/global_mmlu/default/id/_global_mmlu_id.yaml new file mode 100644 index 00000000..cfe87f59 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/id/_global_mmlu_id.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_id +task: + - global_mmlu_id_business + - global_mmlu_id_humanities + - global_mmlu_id_medical + - global_mmlu_id_other + - global_mmlu_id_stem + - global_mmlu_id_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/id/_id_template_yaml b/lm_eval/tasks/global_mmlu/default/id/_id_template_yaml new file mode 100644 index 00000000..fae80c32 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/id/_id_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: id +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_business.yaml b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_business.yaml new file mode 100644 index 00000000..d8f7c1cf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_id_business diff --git a/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_humanities.yaml b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_humanities.yaml new file mode 100644 index 00000000..459442d4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_id_humanities diff --git a/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_medical.yaml b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_medical.yaml new file mode 100644 index 00000000..1fe61f13 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_id_medical diff --git a/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_other.yaml b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_other.yaml new file mode 100644 index 00000000..dfdf7dd2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_id_other diff --git a/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_social_sciences.yaml new file mode 100644 index 00000000..8ac1ddf4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_id_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_stem.yaml b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_stem.yaml new file mode 100644 index 00000000..a2230d33 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_id_stem diff --git a/lm_eval/tasks/global_mmlu/default/id/utils.py b/lm_eval/tasks/global_mmlu/default/id/utils.py new file mode 100644 index 00000000..507a41bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/id/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/it/_global_mmlu_it.yaml b/lm_eval/tasks/global_mmlu/default/it/_global_mmlu_it.yaml new file mode 100644 index 00000000..1378b765 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/it/_global_mmlu_it.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_it +task: + - global_mmlu_it_business + - global_mmlu_it_humanities + - global_mmlu_it_medical + - global_mmlu_it_other + - global_mmlu_it_stem + - global_mmlu_it_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/it/_it_template_yaml b/lm_eval/tasks/global_mmlu/default/it/_it_template_yaml new file mode 100644 index 00000000..e6b1f56d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/it/_it_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: it +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_business.yaml b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_business.yaml new file mode 100644 index 00000000..dabac0a9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_it_business diff --git a/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_humanities.yaml b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_humanities.yaml new file mode 100644 index 00000000..6d2c923f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_it_humanities diff --git a/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_medical.yaml b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_medical.yaml new file mode 100644 index 00000000..25d4efc5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_it_medical diff --git a/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_other.yaml b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_other.yaml new file mode 100644 index 00000000..3e35260d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_it_other diff --git a/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_social_sciences.yaml new file mode 100644 index 00000000..bee79835 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_it_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_stem.yaml b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_stem.yaml new file mode 100644 index 00000000..04502cef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_it_stem diff --git a/lm_eval/tasks/global_mmlu/default/it/utils.py b/lm_eval/tasks/global_mmlu/default/it/utils.py new file mode 100644 index 00000000..507a41bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/it/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/ja/_global_mmlu_ja.yaml b/lm_eval/tasks/global_mmlu/default/ja/_global_mmlu_ja.yaml new file mode 100644 index 00000000..098f3b57 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ja/_global_mmlu_ja.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_ja +task: + - global_mmlu_ja_business + - global_mmlu_ja_humanities + - global_mmlu_ja_medical + - global_mmlu_ja_other + - global_mmlu_ja_stem + - global_mmlu_ja_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/ja/_ja_template_yaml b/lm_eval/tasks/global_mmlu/default/ja/_ja_template_yaml new file mode 100644 index 00000000..5f0e4cc6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ja/_ja_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: ja +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_business.yaml b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_business.yaml new file mode 100644 index 00000000..19a5050a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_ja_business diff --git a/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_humanities.yaml b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_humanities.yaml new file mode 100644 index 00000000..b2d83886 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_ja_humanities diff --git a/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_medical.yaml b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_medical.yaml new file mode 100644 index 00000000..8c0695ef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_ja_medical diff --git a/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_other.yaml b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_other.yaml new file mode 100644 index 00000000..5e72d4c0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_ja_other diff --git a/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_social_sciences.yaml new file mode 100644 index 00000000..acdabd53 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_ja_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_stem.yaml b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_stem.yaml new file mode 100644 index 00000000..b9ab07cb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_ja_stem diff --git a/lm_eval/tasks/global_mmlu/default/ja/utils.py b/lm_eval/tasks/global_mmlu/default/ja/utils.py new file mode 100644 index 00000000..507a41bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ja/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/ko/_global_mmlu_ko.yaml b/lm_eval/tasks/global_mmlu/default/ko/_global_mmlu_ko.yaml new file mode 100644 index 00000000..19f4f961 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ko/_global_mmlu_ko.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_ko +task: + - global_mmlu_ko_business + - global_mmlu_ko_humanities + - global_mmlu_ko_medical + - global_mmlu_ko_other + - global_mmlu_ko_stem + - global_mmlu_ko_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/ko/_ko_template_yaml b/lm_eval/tasks/global_mmlu/default/ko/_ko_template_yaml new file mode 100644 index 00000000..364e159b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ko/_ko_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: ko +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_business.yaml b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_business.yaml new file mode 100644 index 00000000..2f1ce375 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_ko_business diff --git a/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_humanities.yaml b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_humanities.yaml new file mode 100644 index 00000000..a613ff55 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_ko_humanities diff --git a/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_medical.yaml b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_medical.yaml new file mode 100644 index 00000000..7e871038 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_ko_medical diff --git a/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_other.yaml b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_other.yaml new file mode 100644 index 00000000..3fa1c608 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_ko_other diff --git a/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_social_sciences.yaml new file mode 100644 index 00000000..ad5874f9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_ko_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_stem.yaml b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_stem.yaml new file mode 100644 index 00000000..f6c7e8ec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_ko_stem diff --git a/lm_eval/tasks/global_mmlu/default/ko/utils.py b/lm_eval/tasks/global_mmlu/default/ko/utils.py new file mode 100644 index 00000000..507a41bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ko/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/pt/_global_mmlu_pt.yaml b/lm_eval/tasks/global_mmlu/default/pt/_global_mmlu_pt.yaml new file mode 100644 index 00000000..7a489c12 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/pt/_global_mmlu_pt.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_pt +task: + - global_mmlu_pt_business + - global_mmlu_pt_humanities + - global_mmlu_pt_medical + - global_mmlu_pt_other + - global_mmlu_pt_stem + - global_mmlu_pt_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/pt/_pt_template_yaml b/lm_eval/tasks/global_mmlu/default/pt/_pt_template_yaml new file mode 100644 index 00000000..f1db6629 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/pt/_pt_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: pt +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_business.yaml b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_business.yaml new file mode 100644 index 00000000..1e72b168 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_pt_business diff --git a/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_humanities.yaml b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_humanities.yaml new file mode 100644 index 00000000..7244f2a7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_pt_humanities diff --git a/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_medical.yaml b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_medical.yaml new file mode 100644 index 00000000..44776f2c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_pt_medical diff --git a/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_other.yaml b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_other.yaml new file mode 100644 index 00000000..b6121201 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_pt_other diff --git a/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_social_sciences.yaml new file mode 100644 index 00000000..949d346e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_pt_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_stem.yaml b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_stem.yaml new file mode 100644 index 00000000..9f194c11 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_pt_stem diff --git a/lm_eval/tasks/global_mmlu/default/pt/utils.py b/lm_eval/tasks/global_mmlu/default/pt/utils.py new file mode 100644 index 00000000..507a41bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/pt/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/sw/_global_mmlu_sw.yaml b/lm_eval/tasks/global_mmlu/default/sw/_global_mmlu_sw.yaml new file mode 100644 index 00000000..b3913d24 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/sw/_global_mmlu_sw.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_sw +task: + - global_mmlu_sw_business + - global_mmlu_sw_humanities + - global_mmlu_sw_medical + - global_mmlu_sw_other + - global_mmlu_sw_stem + - global_mmlu_sw_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/sw/_sw_template_yaml b/lm_eval/tasks/global_mmlu/default/sw/_sw_template_yaml new file mode 100644 index 00000000..33edff38 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/sw/_sw_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: sw +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_business.yaml b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_business.yaml new file mode 100644 index 00000000..a53ca478 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_sw_business diff --git a/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_humanities.yaml b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_humanities.yaml new file mode 100644 index 00000000..4687df76 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_sw_humanities diff --git a/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_medical.yaml b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_medical.yaml new file mode 100644 index 00000000..76240ea3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_sw_medical diff --git a/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_other.yaml b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_other.yaml new file mode 100644 index 00000000..7c3bfda2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_sw_other diff --git a/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_social_sciences.yaml new file mode 100644 index 00000000..4a77aa2b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_sw_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_stem.yaml b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_stem.yaml new file mode 100644 index 00000000..d6faf18b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_sw_stem diff --git a/lm_eval/tasks/global_mmlu/default/sw/utils.py b/lm_eval/tasks/global_mmlu/default/sw/utils.py new file mode 100644 index 00000000..507a41bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/sw/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/yo/_global_mmlu_yo.yaml b/lm_eval/tasks/global_mmlu/default/yo/_global_mmlu_yo.yaml new file mode 100644 index 00000000..14df221a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/yo/_global_mmlu_yo.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_yo +task: + - global_mmlu_yo_business + - global_mmlu_yo_humanities + - global_mmlu_yo_medical + - global_mmlu_yo_other + - global_mmlu_yo_stem + - global_mmlu_yo_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/yo/_yo_template_yaml b/lm_eval/tasks/global_mmlu/default/yo/_yo_template_yaml new file mode 100644 index 00000000..6cdd0a03 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/yo/_yo_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: yo +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_business.yaml b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_business.yaml new file mode 100644 index 00000000..162a97cf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_yo_business diff --git a/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_humanities.yaml b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_humanities.yaml new file mode 100644 index 00000000..5befbc12 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_yo_humanities diff --git a/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_medical.yaml b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_medical.yaml new file mode 100644 index 00000000..d48d0208 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_yo_medical diff --git a/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_other.yaml b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_other.yaml new file mode 100644 index 00000000..5e407c2c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_yo_other diff --git a/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_social_sciences.yaml new file mode 100644 index 00000000..c85596aa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_yo_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_stem.yaml b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_stem.yaml new file mode 100644 index 00000000..a19e1e8d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_yo_stem diff --git a/lm_eval/tasks/global_mmlu/default/yo/utils.py b/lm_eval/tasks/global_mmlu/default/yo/utils.py new file mode 100644 index 00000000..507a41bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/yo/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/zh/_global_mmlu_zh.yaml b/lm_eval/tasks/global_mmlu/default/zh/_global_mmlu_zh.yaml new file mode 100644 index 00000000..212a33fc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/zh/_global_mmlu_zh.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_zh +task: + - global_mmlu_zh_business + - global_mmlu_zh_humanities + - global_mmlu_zh_medical + - global_mmlu_zh_other + - global_mmlu_zh_stem + - global_mmlu_zh_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/zh/_zh_template_yaml b/lm_eval/tasks/global_mmlu/default/zh/_zh_template_yaml new file mode 100644 index 00000000..eeb1e7b9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/zh/_zh_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: zh +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_business.yaml b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_business.yaml new file mode 100644 index 00000000..aa0a689a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_zh_business diff --git a/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_humanities.yaml b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_humanities.yaml new file mode 100644 index 00000000..823854b9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_zh_humanities diff --git a/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_medical.yaml b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_medical.yaml new file mode 100644 index 00000000..f1f7a7d5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_zh_medical diff --git a/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_other.yaml b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_other.yaml new file mode 100644 index 00000000..a3beae83 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_zh_other diff --git a/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_social_sciences.yaml new file mode 100644 index 00000000..1891a45a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_zh_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_stem.yaml b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_stem.yaml new file mode 100644 index 00000000..6a9f4f05 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_zh_stem diff --git a/lm_eval/tasks/global_mmlu/default/zh/utils.py b/lm_eval/tasks/global_mmlu/default/zh/utils.py new file mode 100644 index 00000000..507a41bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/zh/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am.yaml b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am.yaml index 48fc270a..555bfd86 100644 --- a/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am.yaml +++ b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar.yaml b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar.yaml index 61f60b9b..83340da0 100644 --- a/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar.yaml +++ b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn.yaml b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn.yaml index f1c91f09..135b4bf5 100644 --- a/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn.yaml +++ b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs.yaml b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs.yaml index 977b0051..419d5de4 100644 --- a/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs.yaml +++ b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de.yaml b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de.yaml index c09da268..5217599a 100644 --- a/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de.yaml +++ b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el.yaml b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el.yaml index a77feecb..9d44d8c8 100644 --- a/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el.yaml +++ b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en.yaml b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en.yaml index 648a10dd..d4c82b64 100644 --- a/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en.yaml +++ b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es.yaml b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es.yaml index 832001c1..13d2eccf 100644 --- a/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es.yaml +++ b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa.yaml b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa.yaml index 9edb8540..282664e5 100644 --- a/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa.yaml +++ b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil.yaml b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil.yaml index 24fcb6d2..659c9d4b 100644 --- a/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil.yaml +++ b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr.yaml b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr.yaml index e85d6746..7857a193 100644 --- a/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr.yaml +++ b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha.yaml b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha.yaml index 08a958bb..a5008417 100644 --- a/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha.yaml +++ b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he.yaml b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he.yaml index ff0a5e8f..4952391a 100644 --- a/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he.yaml +++ b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi.yaml b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi.yaml index ed54a6ad..c899be84 100644 --- a/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi.yaml +++ b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id.yaml b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id.yaml index f678660e..fd29a2ad 100644 --- a/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id.yaml +++ b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig.yaml b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig.yaml index a263e295..d5346e1c 100644 --- a/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig.yaml +++ b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it.yaml b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it.yaml index dabb8987..f3bcd694 100644 --- a/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it.yaml +++ b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja.yaml b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja.yaml index 103460d7..af25573d 100644 --- a/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja.yaml +++ b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko.yaml b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko.yaml index d2225e23..8b9b4197 100644 --- a/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko.yaml +++ b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky.yaml b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky.yaml index 4774599a..33b2a4df 100644 --- a/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky.yaml +++ b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt.yaml b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt.yaml index 93929d42..dd2cd37d 100644 --- a/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt.yaml +++ b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg.yaml b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg.yaml index 05b55948..bcc6cd81 100644 --- a/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg.yaml +++ b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms.yaml b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms.yaml index e5a13645..a322dad5 100644 --- a/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms.yaml +++ b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne.yaml b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne.yaml index ec13a0be..7e9c77ce 100644 --- a/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne.yaml +++ b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl.yaml b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl.yaml index 44f562da..c293a558 100644 --- a/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl.yaml +++ b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny.yaml b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny.yaml index c325bf1d..80739c2c 100644 --- a/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny.yaml +++ b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl.yaml b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl.yaml index 2476fd33..deb51e11 100644 --- a/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl.yaml +++ b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt.yaml b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt.yaml index ac79bda1..7cb9f0fe 100644 --- a/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt.yaml +++ b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro.yaml b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro.yaml index b3aa5f49..f4e5575f 100644 --- a/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro.yaml +++ b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru.yaml b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru.yaml index cc63cd34..3fc05379 100644 --- a/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru.yaml +++ b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si.yaml b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si.yaml index 4deed570..6503e267 100644 --- a/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si.yaml +++ b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn.yaml b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn.yaml index 98ced987..fd084b32 100644 --- a/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn.yaml +++ b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so.yaml b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so.yaml index 014a4121..91e11b88 100644 --- a/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so.yaml +++ b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr.yaml b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr.yaml index e322d980..1e0c55a5 100644 --- a/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr.yaml +++ b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv.yaml b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv.yaml index a9b0dc1b..078de8b6 100644 --- a/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv.yaml +++ b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw.yaml b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw.yaml index 274543cf..fd87d068 100644 --- a/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw.yaml +++ b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te.yaml b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te.yaml index 5ef0f7ab..0bc967de 100644 --- a/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te.yaml +++ b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr.yaml b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr.yaml index 8cd3d3f3..aa4ae63f 100644 --- a/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr.yaml +++ b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk.yaml b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk.yaml index e880be32..9bbd14cb 100644 --- a/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk.yaml +++ b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi.yaml b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi.yaml index d6413b35..27888474 100644 --- a/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi.yaml +++ b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo.yaml b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo.yaml index ba9f2460..ef4930b6 100644 --- a/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo.yaml +++ b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh.yaml b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh.yaml index 098ec097..926c8f88 100644 --- a/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh.yaml +++ b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 -- GitLab From a0466f01b0c0df4c31e4ef357935c69f7b38476b Mon Sep 17 00:00:00 2001 From: Irina Proskurina <72871167+upunaprosk@users.noreply.github.com> Date: Tue, 28 Jan 2025 18:02:44 +0100 Subject: [PATCH 13/19] Add Moral Stories (#2653) * Add moral stories task * Add moral stories task * Create README.md * Update README.md * Update line endings in moral_stories files --- lm_eval/tasks/README.md | 1 + lm_eval/tasks/moral_stories/README.md | 71 +++++++++++++++++++ .../tasks/moral_stories/moral_stories.yaml | 20 ++++++ lm_eval/tasks/moral_stories/utils.py | 21 ++++++ 4 files changed, 113 insertions(+) create mode 100644 lm_eval/tasks/moral_stories/README.md create mode 100644 lm_eval/tasks/moral_stories/moral_stories.yaml create mode 100644 lm_eval/tasks/moral_stories/utils.py diff --git a/lm_eval/tasks/README.md b/lm_eval/tasks/README.md index c92043bc..618f2c6e 100644 --- a/lm_eval/tasks/README.md +++ b/lm_eval/tasks/README.md @@ -86,6 +86,7 @@ | [mmlu_pro](mmlu_pro/README.md) | A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options. | English | | [mmlusr](mmlusr/README.md) | Variation of MMLU designed to be more rigorous. | English | | model_written_evals | Evaluation tasks auto-generated for evaluating a collection of AI Safety concerns. | | +| [moral_stories](moral_stories/README.md) | A crowd-sourced dataset of structured narratives that describe normative and norm-divergent actions taken by individuals to accomplish certain intentions in concrete situations. | English | [mutual](mutual/README.md) | A retrieval-based dataset for multi-turn dialogue reasoning. | English | | [nq_open](nq_open/README.md) | Open domain question answering tasks based on the Natural Questions dataset. | English | | [okapi/arc_multilingual](okapi/arc_multilingual/README.md) | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (31 languages) **Machine Translated.** | diff --git a/lm_eval/tasks/moral_stories/README.md b/lm_eval/tasks/moral_stories/README.md new file mode 100644 index 00000000..14d6fb2e --- /dev/null +++ b/lm_eval/tasks/moral_stories/README.md @@ -0,0 +1,71 @@ +# Moral Stories + +### Paper + +Title: `Moral Stories: Situated Reasoning about Norms, Intents, Actions, and their Consequences` + +Abstract: `https://aclanthology.org/2021.emnlp-main.54/` + +Moral Stories is a crowd-sourced dataset of structured narratives that describe normative and norm-divergent actions taken by individuals to accomplish certain intentions in concrete situations, and their respective consequences. All stories in the dataset consist of seven sentences, belonging to the following categories: + +- Norm: A guideline for social conduct generally observed by most people in everyday situations. +- Situation: Setting of the story that introduces story participants and describes their environment. +- Intention: Reasonable goal that one of the story participants (the actor), wants to fulfill. +- Normative action: An action by the actor that fulfills the intention and observes the norm. +- Normative consequence: Possible effect of the normative action on the actor's environment. +- Divergent action: An action by the actor that fulfills the intention and diverges from the norm. +- Divergent consequence: Possible effect of the divergent action on the actor's environment. + + +Homepage: `https://github.com/demelin/moral_stories` + +The implementation is based on the paper "Histoires Morales: A French Dataset for Assessing Moral Alignment." The source code is available at: `https://github.com/upunaprosk/histoires-morales`. + +### Citation + +``` +@inproceedings{emelin-etal-2021-moral, + title = "Moral Stories: Situated Reasoning about Norms, Intents, Actions, and their Consequences", + author = "Emelin, Denis and + Le Bras, Ronan and + Hwang, Jena D. and + Forbes, Maxwell and + Choi, Yejin", + booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing", + month = nov, + year = "2021", + address = "Online and Punta Cana, Dominican Republic", + publisher = "Association for Computational Linguistics", + url = "https://aclanthology.org/2021.emnlp-main.54", + doi = "10.18653/v1/2021.emnlp-main.54", + pages = "698--718", + abstract = "In social settings, much of human behavior is governed by unspoken rules of conduct rooted in societal norms. For artificial systems to be fully integrated into social environments, adherence to such norms is a central prerequisite. To investigate whether language generation models can serve as behavioral priors for systems deployed in social settings, we evaluate their ability to generate action descriptions that achieve predefined goals under normative constraints. Moreover, we examine if models can anticipate likely consequences of actions that either observe or violate known norms, or explain why certain actions are preferable by generating relevant norm hypotheses. For this purpose, we introduce Moral Stories, a crowd-sourced dataset of structured, branching narratives for the study of grounded, goal-oriented social reasoning. Finally, we propose decoding strategies that combine multiple expert models to significantly improve the quality of generated actions, consequences, and norms compared to strong baselines.", +} +``` + +### Groups, Tags, and Tasks + +#### Groups + +* Not part of a group yet + +#### Tags + +* `moral_stories`: `Evaluation of the likelihoods of moral actions versus immoral actions. Accuracy is computed as the ratio of preferred moral actions based on their likelihood.` + +#### Tasks + +* `moral_stories.yaml` + +### Checklist + +For adding novel benchmarks/datasets to the library: +* [x] Is the task an existing benchmark in the literature? + * [ ] Have you referenced the original paper that introduced the task? + * [ ] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test? + + +If other tasks on this dataset are already supported: +* [ ] Is the "Main" variant of this task clearly denoted? +* [ ] Have you provided a short sentence in a README on what each new variant adds / evaluates? +* [ ] Have you noted which, if any, published evaluation setups are matched by this variant? diff --git a/lm_eval/tasks/moral_stories/moral_stories.yaml b/lm_eval/tasks/moral_stories/moral_stories.yaml new file mode 100644 index 00000000..0d8e3d99 --- /dev/null +++ b/lm_eval/tasks/moral_stories/moral_stories.yaml @@ -0,0 +1,20 @@ +tag: + - moral_stories +task: moral_stories +dataset_path: demelin/moral_stories +dataset_name: full +output_type: multiple_choice +test_split: train +process_docs: !function utils.process_docs +doc_to_text: "{{query}}" +doc_to_target: "{{label}}" +doc_to_choice: "choices" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true +metadata: + version: 1.0 diff --git a/lm_eval/tasks/moral_stories/utils.py b/lm_eval/tasks/moral_stories/utils.py new file mode 100644 index 00000000..2e996b74 --- /dev/null +++ b/lm_eval/tasks/moral_stories/utils.py @@ -0,0 +1,21 @@ +import datasets + + +def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: + def _process_doc(doc): + ctx = ( + doc["norm"].capitalize() + + " " + + doc["situation"].capitalize() + + " " + + doc["intention"].capitalize() + ) + choices = [doc["moral_action"], doc["immoral_action"]] + out_doc = { + "query": ctx, + "choices": choices, + "label": 0, + } + return out_doc + + return dataset.map(_process_doc) -- GitLab From 42f791317d3e96dc48faf4f9d7590e38f47e6d24 Mon Sep 17 00:00:00 2001 From: Nicky Pochinkov <52249105+nickypro@users.noreply.github.com> Date: Tue, 28 Jan 2025 17:03:18 +0000 Subject: [PATCH 14/19] add TransformerLens example (#2651) * add TransformerLens example Many people use TransformerLens to do interpretability and interventions on models, and then need to test the model. Here is a simple script that allows one to pass in the TransformerLens model and run evaluations on it. * Ran pre-commit checks --- examples/transformer-lens.py | 59 ++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 examples/transformer-lens.py diff --git a/examples/transformer-lens.py b/examples/transformer-lens.py new file mode 100644 index 00000000..e03576b1 --- /dev/null +++ b/examples/transformer-lens.py @@ -0,0 +1,59 @@ +import warnings + +import torch +import torch.nn as nn +from transformer_lens import HookedTransformer +from transformers import AutoConfig + +from lm_eval import evaluator +from lm_eval.models.huggingface import HFLM + + +def evaluate_lm_eval(lens_model: HookedTransformer, tasks: list[str], **kwargs): + class HFLikeModelAdapter(nn.Module): + """Adapts HookedTransformer to match the HuggingFace interface expected by lm-eval""" + + def __init__(self, model: HookedTransformer): + super().__init__() + self.model = model + self.tokenizer = model.tokenizer + self.config = AutoConfig.from_pretrained(model.cfg.tokenizer_name) + self.device = model.cfg.device + self.tie_weights = lambda: self + + def forward(self, input_ids=None, attention_mask=None, **kwargs): + output = self.model(input_ids, attention_mask=attention_mask, **kwargs) + # Make sure output has the expected .logits attribute + if not hasattr(output, "logits"): + if isinstance(output, torch.Tensor): + output.logits = output + return output + + # Only delegate specific attributes we know we need + def to(self, *args, **kwargs): + return self.model.to(*args, **kwargs) + + def eval(self): + self.model.eval() + return self + + def train(self, mode=True): + self.model.train(mode) + return self + + model = HFLikeModelAdapter(lens_model) + warnings.filterwarnings("ignore", message="Failed to get model SHA for") + results = evaluator.simple_evaluate( + model=HFLM(pretrained=model, tokenizer=model.tokenizer), + tasks=tasks, + verbosity="WARNING", + **kwargs, + ) + return results + + +if __name__ == "__main__": + # Load base model + model = HookedTransformer.from_pretrained("pythia-70m") + res = evaluate_lm_eval(model, tasks=["arc_easy"]) + print(res["results"]) -- GitLab From 96e499baf4fb9a382d7fa3f0bc533d3d20ea72fc Mon Sep 17 00:00:00 2001 From: Baber Abbasi <92168766+baberabb@users.noreply.github.com> Date: Tue, 28 Jan 2025 17:04:20 +0000 Subject: [PATCH 15/19] fix multiple input chat tempalte (#2576) * feat: drop Python 3.8 support * feat: drop Python 3.8 tests * pre-commit * handle chat_template for multiple iput --- lm_eval/api/task.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/lm_eval/api/task.py b/lm_eval/api/task.py index f14f36e8..dc44150e 100644 --- a/lm_eval/api/task.py +++ b/lm_eval/api/task.py @@ -456,6 +456,7 @@ class Task(abc.ABC): ctx=fewshot_ctx, metadata=(self.config["task"], doc_id, self.config.repeats), apply_chat_template=apply_chat_template, + chat_template=chat_template, ) if not isinstance(inst, list): @@ -1098,6 +1099,8 @@ class ConfigurableTask(Task): if apply_chat_template: if self.multiple_input: # TODO: append prefill? + if not labeled_examples: + return "" return chat_template(labeled_examples) if isinstance(example, str): self.append_target_question( @@ -1350,6 +1353,7 @@ class ConfigurableTask(Task): self, doc: dict, ctx: str, **kwargs ) -> Union[List[Instance], Instance]: apply_chat_template = kwargs.pop("apply_chat_template", False) + chat_template: Callable | None = kwargs.pop("chat_template", None) aux_arguments = None @@ -1364,9 +1368,20 @@ class ConfigurableTask(Task): target_delimiter = "" if self.multiple_input: # If there are multiple inputs, choices are placed in the ctx + # apply chat_template to choices if apply_chat_template cont = self.doc_to_target(doc) + arguments = [ - (ctx + choice, f"{target_delimiter}{cont}") for choice in choices + ( + ctx + + ( + chat_template([{"role": "user", "content": choice}]) + if apply_chat_template + else choice + ), + f"{target_delimiter}{cont}", + ) + for choice in choices ] else: # Otherwise they are placed in the continuation -- GitLab From 94344a61da1f79536542454cc9c26f99c36b978f Mon Sep 17 00:00:00 2001 From: Seungwoo Ryu Date: Wed, 29 Jan 2025 04:56:34 +0900 Subject: [PATCH 16/19] Add Aggregation for Kobest Benchmark (#2446) Co-authored-by: Baber --- lm_eval/tasks/kobest/_kobest.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 lm_eval/tasks/kobest/_kobest.yaml diff --git a/lm_eval/tasks/kobest/_kobest.yaml b/lm_eval/tasks/kobest/_kobest.yaml new file mode 100644 index 00000000..cf23f664 --- /dev/null +++ b/lm_eval/tasks/kobest/_kobest.yaml @@ -0,0 +1,19 @@ +group: kobest +task: + - kobest_boolq + - kobest_copa + - kobest_hellaswag + - kobest_sentineg + - kobest_wic +aggregate_metric_list: + - metric: acc + aggregation: mean + weight_by_size: true + - metric: acc_norm + aggregation: mean + weight_by_size: true + - metric: f1 + aggregation: mean + weight_by_size: true +metadata: + version: 1.0 -- GitLab From 4b4b036318f6afd880969ad50be81a20768cfeb5 Mon Sep 17 00:00:00 2001 From: Baber Abbasi <92168766+baberabb@users.noreply.github.com> Date: Tue, 28 Jan 2025 21:11:25 +0000 Subject: [PATCH 17/19] update pre-commit (#2660) * nit * update pre-commit --- .pre-commit-config.yaml | 4 ++-- lm_eval/tasks/moral_stories/moral_stories.yaml | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3b5da239..a2465d0e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -29,7 +29,7 @@ repos: - id: mixed-line-ending args: [--fix=lf] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.9.2 + rev: v0.9.3 hooks: # Run the linter. - id: ruff @@ -38,7 +38,7 @@ repos: # Run the formatter. - id: ruff-format - repo: https://github.com/codespell-project/codespell - rev: v2.3.0 + rev: v2.4.1 hooks: - id: codespell exclude: > diff --git a/lm_eval/tasks/moral_stories/moral_stories.yaml b/lm_eval/tasks/moral_stories/moral_stories.yaml index 0d8e3d99..46f94b9c 100644 --- a/lm_eval/tasks/moral_stories/moral_stories.yaml +++ b/lm_eval/tasks/moral_stories/moral_stories.yaml @@ -1,5 +1,3 @@ -tag: - - moral_stories task: moral_stories dataset_path: demelin/moral_stories dataset_name: full -- GitLab From fe9c5707f58f41db0539229c2d55f9c164e98260 Mon Sep 17 00:00:00 2001 From: Baber Abbasi <92168766+baberabb@users.noreply.github.com> Date: Wed, 29 Jan 2025 15:45:57 +0000 Subject: [PATCH 18/19] remove `group` from bigbench task configs (#2663) * remove group from task configs * add tags * update readme --- lm_eval/tasks/bigbench/README.md | 6 ++++++ lm_eval/tasks/bigbench/generate_until_template_yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice_template_a_yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice_template_b_yaml | 2 +- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/lm_eval/tasks/bigbench/README.md b/lm_eval/tasks/bigbench/README.md index be680eac..268f75b6 100644 --- a/lm_eval/tasks/bigbench/README.md +++ b/lm_eval/tasks/bigbench/README.md @@ -30,6 +30,12 @@ Homepage: https://github.com/google/BIG-bench * `group_name`: `Short description` +#### Tags + +* `bigbench_generate_until` +* `bigbench_multiple_choice_a` +* `bigbench_multiple_choice_b` + #### Tasks * `task_name`: `1-sentence description of what this particular task does` diff --git a/lm_eval/tasks/bigbench/generate_until_template_yaml b/lm_eval/tasks/bigbench/generate_until_template_yaml index 25593a4e..c8c30600 100644 --- a/lm_eval/tasks/bigbench/generate_until_template_yaml +++ b/lm_eval/tasks/bigbench/generate_until_template_yaml @@ -1,4 +1,4 @@ -group: bigbench_generate_until +tag: bigbench_generate_until dataset_path: hails/bigbench output_type: generate_until dataset_kwargs: diff --git a/lm_eval/tasks/bigbench/multiple_choice_template_a_yaml b/lm_eval/tasks/bigbench/multiple_choice_template_a_yaml index 4b5f9e89..de210a41 100644 --- a/lm_eval/tasks/bigbench/multiple_choice_template_a_yaml +++ b/lm_eval/tasks/bigbench/multiple_choice_template_a_yaml @@ -1,4 +1,4 @@ -group: bigbench_multiple_choice +tag: bigbench_multiple_choice_a dataset_path: hails/bigbench dataset_kwargs: # num_shots: 0 # TODO: num of shots for `bigbench` HF dataset should be controlled through this, not through the typical methods diff --git a/lm_eval/tasks/bigbench/multiple_choice_template_b_yaml b/lm_eval/tasks/bigbench/multiple_choice_template_b_yaml index 2900103e..dc695c98 100644 --- a/lm_eval/tasks/bigbench/multiple_choice_template_b_yaml +++ b/lm_eval/tasks/bigbench/multiple_choice_template_b_yaml @@ -1,4 +1,4 @@ -group: bigbench_multiple_choice +tag: bigbench_multiple_choice_b dataset_path: hails/bigbench dataset_kwargs: # num_shots: 0 # TODO: num of shots for `bigbench` HF dataset should be controlled through this, not through the typical methods -- GitLab From 1208afd34ce132e598fcd7e832762630a35d01c6 Mon Sep 17 00:00:00 2001 From: Irina Proskurina <72871167+upunaprosk@users.noreply.github.com> Date: Wed, 29 Jan 2025 16:52:14 +0100 Subject: [PATCH 19/19] Add Histoires Morales task (#2662) * Add Histoires Morales task * Histoires Morales task: fix mixed line endings * Histoires Morales task: fix mixed line endings * Remove tag for a single task * Add some MT for Histoires Morales --- lm_eval/tasks/README.md | 265 +++++++++--------- lm_eval/tasks/histoires_morales/README.md | 62 ++++ .../histoires_morales/histoires_morales.yaml | 17 ++ lm_eval/tasks/histoires_morales/utils.py | 21 ++ 4 files changed, 233 insertions(+), 132 deletions(-) create mode 100644 lm_eval/tasks/histoires_morales/README.md create mode 100644 lm_eval/tasks/histoires_morales/histoires_morales.yaml create mode 100644 lm_eval/tasks/histoires_morales/utils.py diff --git a/lm_eval/tasks/README.md b/lm_eval/tasks/README.md index 618f2c6e..4f14e9ec 100644 --- a/lm_eval/tasks/README.md +++ b/lm_eval/tasks/README.md @@ -5,137 +5,138 @@ For more information, including a full list of task names and their precise meanings or sources, follow the links provided to the individual README.md files for each subfolder. -| Task Family | Description | Language(s) | -|-------------|-------------|-------------------------------------------------------------------------------------------------------------------------------| -| [aclue](aclue/README.md) | Tasks focusing on ancient Chinese language understanding and cultural aspects. | Ancient Chinese | -| [aexams](aexams/README.md) | Tasks in Arabic related to various academic exams covering a range of subjects. | Arabic | -| [agieval](agieval/README.md) | Tasks involving historical data or questions related to history and historical texts. | English, Chinese | -| [anli](anli/README.md) | Adversarial natural language inference tasks designed to test model robustness. | English | -| [arabic_leaderboard_complete](arabic_leaderboard_complete/README.md) | A full version of the tasks in the Open Arabic LLM Leaderboard, focusing on the evaluation of models that reflect the characteristics of Arabic language understanding and comprehension, culture, and heritage. Note that some of these tasks are machine-translated. | Arabic (Some MT) | -| [arabic_leaderboard_light](arabic_leaderboard_light/README.md) | A light version of the tasks in the Open Arabic LLM Leaderboard (i.e., 10% samples of the test set in the original benchmarks), focusing on the evaluation of models that reflect the characteristics of Arabic language understanding and comprehension, culture, and heritage. Note that some of these tasks are machine-translated. | Arabic (Some MT) | -| [arabicmmlu](arabicmmlu/README.md) | Localized Arabic version of MMLU with multiple-choice questions from 40 subjects. | Arabic | -| [AraDICE](aradice/README.md) | A collection of multiple tasks carefully designed to evaluate dialectal and cultural capabilities in large language models (LLMs). | Arabic | -| [arc](arc/README.md) | Tasks involving complex reasoning over a diverse set of questions. | English | -| [arithmetic](arithmetic/README.md) | Tasks involving numerical computations and arithmetic reasoning. | English | -| [asdiv](asdiv/README.md) | Tasks involving arithmetic and mathematical reasoning challenges. | English | -| [babi](babi/README.md) | Tasks designed as question and answering challenges based on simulated stories. | English | -| [basque_bench](basque_bench/README.md) | Collection of tasks in Basque encompassing various evaluation areas. | Basque | -| [basqueglue](basqueglue/README.md) | Tasks designed to evaluate language understanding in Basque language. | Basque | -| [bbh](bbh/README.md) | Tasks focused on deep semantic understanding through hypothesization and reasoning. | English, German | -| [belebele](belebele/README.md) | Language understanding tasks in a variety of languages and scripts. | Multiple (122 languages) | -| benchmarks | General benchmarking tasks that test a wide range of language understanding capabilities. | | -| [bertaqa](bertaqa/README.md) | Local Basque cultural trivia QA tests in English and Basque languages. | English, Basque, Basque (MT) | -| [bigbench](bigbench/README.md) | Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models. | Multiple | -| [blimp](blimp/README.md) | Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities. | English | -| [catalan_bench](catalan_bench/README.md) | Collection of tasks in Catalan encompassing various evaluation areas. | Catalan | -| [ceval](ceval/README.md) | Tasks that evaluate language understanding and reasoning in an educational context. | Chinese | -| [cmmlu](cmmlu/README.md) | Multi-subject multiple choice question tasks for comprehensive academic assessment. | Chinese | -| code_x_glue | Tasks that involve understanding and generating code across multiple programming languages. | Go, Java, JS, PHP, Python, Ruby | -| [commonsense_qa](commonsense_qa/README.md) | CommonsenseQA, a multiple-choice QA dataset for measuring commonsense knowledge. | English | -| [copal_id](copal_id/README.md) | Indonesian causal commonsense reasoning dataset that captures local nuances. | Indonesian | -| [coqa](coqa/README.md) | Conversational question answering tasks to test dialog understanding. | English | -| [crows_pairs](crows_pairs/README.md) | Tasks designed to test model biases in various sociodemographic groups. | English, French | -| csatqa | Tasks related to SAT and other standardized testing questions for academic assessment. | Korean | -| [drop](drop/README.md) | Tasks requiring numerical reasoning, reading comprehension, and question answering. | English | -| [eq_bench](eq_bench/README.md) | Tasks focused on equality and ethics in question answering and decision-making. | English | -| [eus_exams](eus_exams/README.md) | Tasks based on various professional and academic exams in the Basque language. | Basque | -| [eus_proficiency](eus_proficiency/README.md) | Tasks designed to test proficiency in the Basque language across various topics. | Basque | -| [eus_reading](eus_reading/README.md) | Reading comprehension tasks specifically designed for the Basque language. | Basque | -| [eus_trivia](eus_trivia/README.md) | Trivia and knowledge testing tasks in the Basque language. | Basque | -| [fda](fda/README.md) | Tasks for extracting key-value pairs from FDA documents to test information extraction. | English | -| [fld](fld/README.md) | Tasks involving free-form and directed dialogue understanding. | English | -| [french_bench](french_bench/README.md) | Set of tasks designed to assess language model performance in French. | French | -| [galician_bench](galician_bench/README.md) | Collection of tasks in Galician encompassing various evaluation areas. | Galician | -| [global_mmlu](global_mmlu/README.md) | Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits. | Multiple (15 languages) | -| [glue](glue/README.md) | General Language Understanding Evaluation benchmark to test broad language abilities. | English | -| [gpqa](gpqa/README.md) | Tasks designed for general public question answering and knowledge verification. | English | -| [gsm8k](gsm8k/README.md) | A benchmark of grade school math problems aimed at evaluating reasoning capabilities. | English | -| [haerae](haerae/README.md) | Tasks focused on assessing detailed factual and historical knowledge. | Korean | -| [headqa](headqa/README.md) | A high-level education-based question answering dataset to test specialized knowledge. | Spanish, English | -| [hellaswag](hellaswag/README.md) | Tasks to predict the ending of stories or scenarios, testing comprehension and creativity. | English | -| [hendrycks_ethics](hendrycks_ethics/README.md) | Tasks designed to evaluate the ethical reasoning capabilities of models. | English | -| [hendrycks_math](hendrycks_math/README.md) | Mathematical problem-solving tasks to test numerical reasoning and problem-solving. | English | -| [hrm8k](hrm8k/README.md) | A challenging bilingual math reasoning benchmark for Korean and English. | Korean (Some MT), English (Some MT) | -| [humaneval](humaneval/README.md) | Code generation task that measure functional correctness for synthesizing programs from docstrings. | Python | -| [ifeval](ifeval/README.md) | Interactive fiction evaluation tasks for narrative understanding and reasoning. | English | -| [inverse_scaling](inverse_scaling/README.md) | Multiple-choice tasks from the Inverse Scaling Prize, designed to find settings where larger language models perform worse. | English | -| [japanese_leaderboard](japanese_leaderboard/README.md) | Japanese language understanding tasks to benchmark model performance on various linguistic aspects. | Japanese | -| [kbl](kbl/README.md) | Korean Benchmark for Legal Language Understanding. | Korean | -| [kmmlu](kmmlu/README.md) | Knowledge-based multi-subject multiple choice questions for academic evaluation. | Korean | -| [kobest](kobest/README.md) | A collection of tasks designed to evaluate understanding in Korean language. | Korean | -| [kormedmcqa](kormedmcqa/README.md) | Medical question answering tasks in Korean to test specialized domain knowledge. | Korean | -| [lambada](lambada/README.md) | Tasks designed to predict the endings of text passages, testing language prediction skills. | English | -| [lambada_cloze](lambada_cloze/README.md) | Cloze-style LAMBADA dataset. | English | -| [lambada_multilingual](lambada_multilingual/README.md) | Multilingual LAMBADA dataset. This is a legacy version of the multilingual dataset, and users should instead use `lambada_multilingual_stablelm`. | German, English, Spanish, French, Italian | +| Task Family | Description | Language(s) | +|--------------------------------------------------------------------------|-------------|-------------------------------------------------------------------------------------------------------------------------------| +| [aclue](aclue/README.md) | Tasks focusing on ancient Chinese language understanding and cultural aspects. | Ancient Chinese | +| [aexams](aexams/README.md) | Tasks in Arabic related to various academic exams covering a range of subjects. | Arabic | +| [agieval](agieval/README.md) | Tasks involving historical data or questions related to history and historical texts. | English, Chinese | +| [anli](anli/README.md) | Adversarial natural language inference tasks designed to test model robustness. | English | +| [arabic_leaderboard_complete](arabic_leaderboard_complete/README.md) | A full version of the tasks in the Open Arabic LLM Leaderboard, focusing on the evaluation of models that reflect the characteristics of Arabic language understanding and comprehension, culture, and heritage. Note that some of these tasks are machine-translated. | Arabic (Some MT) | +| [arabic_leaderboard_light](arabic_leaderboard_light/README.md) | A light version of the tasks in the Open Arabic LLM Leaderboard (i.e., 10% samples of the test set in the original benchmarks), focusing on the evaluation of models that reflect the characteristics of Arabic language understanding and comprehension, culture, and heritage. Note that some of these tasks are machine-translated. | Arabic (Some MT) | +| [arabicmmlu](arabicmmlu/README.md) | Localized Arabic version of MMLU with multiple-choice questions from 40 subjects. | Arabic | +| [AraDICE](aradice/README.md) | A collection of multiple tasks carefully designed to evaluate dialectal and cultural capabilities in large language models (LLMs). | Arabic | +| [arc](arc/README.md) | Tasks involving complex reasoning over a diverse set of questions. | English | +| [arithmetic](arithmetic/README.md) | Tasks involving numerical computations and arithmetic reasoning. | English | +| [asdiv](asdiv/README.md) | Tasks involving arithmetic and mathematical reasoning challenges. | English | +| [babi](babi/README.md) | Tasks designed as question and answering challenges based on simulated stories. | English | +| [basque_bench](basque_bench/README.md) | Collection of tasks in Basque encompassing various evaluation areas. | Basque | +| [basqueglue](basqueglue/README.md) | Tasks designed to evaluate language understanding in Basque language. | Basque | +| [bbh](bbh/README.md) | Tasks focused on deep semantic understanding through hypothesization and reasoning. | English, German | +| [belebele](belebele/README.md) | Language understanding tasks in a variety of languages and scripts. | Multiple (122 languages) | +| benchmarks | General benchmarking tasks that test a wide range of language understanding capabilities. | | +| [bertaqa](bertaqa/README.md) | Local Basque cultural trivia QA tests in English and Basque languages. | English, Basque, Basque (MT) | +| [bigbench](bigbench/README.md) | Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models. | Multiple | +| [blimp](blimp/README.md) | Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities. | English | +| [catalan_bench](catalan_bench/README.md) | Collection of tasks in Catalan encompassing various evaluation areas. | Catalan | +| [ceval](ceval/README.md) | Tasks that evaluate language understanding and reasoning in an educational context. | Chinese | +| [cmmlu](cmmlu/README.md) | Multi-subject multiple choice question tasks for comprehensive academic assessment. | Chinese | +| code_x_glue | Tasks that involve understanding and generating code across multiple programming languages. | Go, Java, JS, PHP, Python, Ruby | +| [commonsense_qa](commonsense_qa/README.md) | CommonsenseQA, a multiple-choice QA dataset for measuring commonsense knowledge. | English | +| [copal_id](copal_id/README.md) | Indonesian causal commonsense reasoning dataset that captures local nuances. | Indonesian | +| [coqa](coqa/README.md) | Conversational question answering tasks to test dialog understanding. | English | +| [crows_pairs](crows_pairs/README.md) | Tasks designed to test model biases in various sociodemographic groups. | English, French | +| csatqa | Tasks related to SAT and other standardized testing questions for academic assessment. | Korean | +| [drop](drop/README.md) | Tasks requiring numerical reasoning, reading comprehension, and question answering. | English | +| [eq_bench](eq_bench/README.md) | Tasks focused on equality and ethics in question answering and decision-making. | English | +| [eus_exams](eus_exams/README.md) | Tasks based on various professional and academic exams in the Basque language. | Basque | +| [eus_proficiency](eus_proficiency/README.md) | Tasks designed to test proficiency in the Basque language across various topics. | Basque | +| [eus_reading](eus_reading/README.md) | Reading comprehension tasks specifically designed for the Basque language. | Basque | +| [eus_trivia](eus_trivia/README.md) | Trivia and knowledge testing tasks in the Basque language. | Basque | +| [fda](fda/README.md) | Tasks for extracting key-value pairs from FDA documents to test information extraction. | English | +| [fld](fld/README.md) | Tasks involving free-form and directed dialogue understanding. | English | +| [french_bench](french_bench/README.md) | Set of tasks designed to assess language model performance in French. | French | +| [galician_bench](galician_bench/README.md) | Collection of tasks in Galician encompassing various evaluation areas. | Galician | +| [global_mmlu](global_mmlu/README.md) | Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits. | Multiple (15 languages) | +| [glue](glue/README.md) | General Language Understanding Evaluation benchmark to test broad language abilities. | English | +| [gpqa](gpqa/README.md) | Tasks designed for general public question answering and knowledge verification. | English | +| [gsm8k](gsm8k/README.md) | A benchmark of grade school math problems aimed at evaluating reasoning capabilities. | English | +| [haerae](haerae/README.md) | Tasks focused on assessing detailed factual and historical knowledge. | Korean | +| [headqa](headqa/README.md) | A high-level education-based question answering dataset to test specialized knowledge. | Spanish, English | +| [hellaswag](hellaswag/README.md) | Tasks to predict the ending of stories or scenarios, testing comprehension and creativity. | English | +| [hendrycks_ethics](hendrycks_ethics/README.md) | Tasks designed to evaluate the ethical reasoning capabilities of models. | English | +| [hendrycks_math](hendrycks_math/README.md) | Mathematical problem-solving tasks to test numerical reasoning and problem-solving. | English | +| [histoires_morales](histoires_morales/README.md) | A dataset of structured narratives that describe normative and norm-divergent actions taken by individuals to accomplish certain intentions in concrete situations. | French (Some MT) | +| [hrm8k](hrm8k/README.md) | A challenging bilingual math reasoning benchmark for Korean and English. | Korean (Some MT), English (Some MT) | +| [humaneval](humaneval/README.md) | Code generation task that measure functional correctness for synthesizing programs from docstrings. | Python | +| [ifeval](ifeval/README.md) | Interactive fiction evaluation tasks for narrative understanding and reasoning. | English | +| [inverse_scaling](inverse_scaling/README.md) | Multiple-choice tasks from the Inverse Scaling Prize, designed to find settings where larger language models perform worse. | English | +| [japanese_leaderboard](japanese_leaderboard/README.md) | Japanese language understanding tasks to benchmark model performance on various linguistic aspects. | Japanese | +| [kbl](kbl/README.md) | Korean Benchmark for Legal Language Understanding. | Korean | +| [kmmlu](kmmlu/README.md) | Knowledge-based multi-subject multiple choice questions for academic evaluation. | Korean | +| [kobest](kobest/README.md) | A collection of tasks designed to evaluate understanding in Korean language. | Korean | +| [kormedmcqa](kormedmcqa/README.md) | Medical question answering tasks in Korean to test specialized domain knowledge. | Korean | +| [lambada](lambada/README.md) | Tasks designed to predict the endings of text passages, testing language prediction skills. | English | +| [lambada_cloze](lambada_cloze/README.md) | Cloze-style LAMBADA dataset. | English | +| [lambada_multilingual](lambada_multilingual/README.md) | Multilingual LAMBADA dataset. This is a legacy version of the multilingual dataset, and users should instead use `lambada_multilingual_stablelm`. | German, English, Spanish, French, Italian | | [lambada_multilingual_stablelm](lambada_multilingual_stablelm/README.md) | Multilingual LAMBADA dataset. Users should prefer evaluating on this version of the multilingual dataset instead of on `lambada_multilingual`. | German, English, Spanish, French, Italian, Dutch, Portuguese | -| [leaderboard](leaderboard/README.md) | Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time | English | -| [lingoly](lingoly/README.md) | Challenging logical reasoning benchmark in low-resource languages with controls for memorization | English, Multilingual | -| [logiqa](logiqa/README.md) | Logical reasoning tasks requiring advanced inference and deduction. | English, Chinese | -| [logiqa2](logiqa2/README.md) | Large-scale logical reasoning dataset adapted from the Chinese Civil Service Examination. | English, Chinese | -| [mathqa](mathqa/README.md) | Question answering tasks involving mathematical reasoning and problem-solving. | English | -| [mbpp](mbpp/README.md) | A benchmark designed to measure the ability to synthesize short Python programs from natural language descriptions. | Python | -| [mc_taco](mc_taco/README.md) | Question-answer pairs that require temporal commonsense comprehension. | English | -| [med_concepts_qa](med_concepts_qa/README.md) | Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept. | English | -| [metabench](metabench/README.md) | Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait. | English | -| medmcqa | Medical multiple choice questions assessing detailed medical knowledge. | English | -| medqa | Multiple choice question answering based on the United States Medical License Exams. | | -| [mgsm](mgsm/README.md) | Benchmark of multilingual grade-school math problems. | Spanish, French, German, Russian, Chinese, Japanese, Thai, Swahili, Bengali, Telugu | -| [minerva_math](minerva_math/README.md) | Mathematics-focused tasks requiring numerical reasoning and problem-solving skills. | English | -| [mlqa](mlqa/README.md) | MultiLingual Question Answering benchmark dataset for evaluating cross-lingual question answering performance. | English, Arabic, German, Spanish, Hindi, Vietnamese, Simplified Chinese | -| [mmlu](mmlu/README.md) | Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported. | English | -| [mmlu_pro](mmlu_pro/README.md) | A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options. | English | -| [mmlusr](mmlusr/README.md) | Variation of MMLU designed to be more rigorous. | English | -| model_written_evals | Evaluation tasks auto-generated for evaluating a collection of AI Safety concerns. | | -| [moral_stories](moral_stories/README.md) | A crowd-sourced dataset of structured narratives that describe normative and norm-divergent actions taken by individuals to accomplish certain intentions in concrete situations. | English -| [mutual](mutual/README.md) | A retrieval-based dataset for multi-turn dialogue reasoning. | English | -| [nq_open](nq_open/README.md) | Open domain question answering tasks based on the Natural Questions dataset. | English | -| [okapi/arc_multilingual](okapi/arc_multilingual/README.md) | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (31 languages) **Machine Translated.** | -| [okapi/hellaswag_multilingual](okapi/hellaswag_multilingual/README.md) | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (30 languages) **Machine Translated.** | -| okapi/mmlu_multilingual | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (34 languages) **Machine Translated.** | +| [leaderboard](leaderboard/README.md) | Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time | English | +| [lingoly](lingoly/README.md) | Challenging logical reasoning benchmark in low-resource languages with controls for memorization | English, Multilingual | +| [logiqa](logiqa/README.md) | Logical reasoning tasks requiring advanced inference and deduction. | English, Chinese | +| [logiqa2](logiqa2/README.md) | Large-scale logical reasoning dataset adapted from the Chinese Civil Service Examination. | English, Chinese | +| [mathqa](mathqa/README.md) | Question answering tasks involving mathematical reasoning and problem-solving. | English | +| [mbpp](mbpp/README.md) | A benchmark designed to measure the ability to synthesize short Python programs from natural language descriptions. | Python | +| [mc_taco](mc_taco/README.md) | Question-answer pairs that require temporal commonsense comprehension. | English | +| [med_concepts_qa](med_concepts_qa/README.md) | Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept. | English | +| [metabench](metabench/README.md) | Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait. | English | +| medmcqa | Medical multiple choice questions assessing detailed medical knowledge. | English | +| medqa | Multiple choice question answering based on the United States Medical License Exams. | | +| [mgsm](mgsm/README.md) | Benchmark of multilingual grade-school math problems. | Spanish, French, German, Russian, Chinese, Japanese, Thai, Swahili, Bengali, Telugu | +| [minerva_math](minerva_math/README.md) | Mathematics-focused tasks requiring numerical reasoning and problem-solving skills. | English | +| [mlqa](mlqa/README.md) | MultiLingual Question Answering benchmark dataset for evaluating cross-lingual question answering performance. | English, Arabic, German, Spanish, Hindi, Vietnamese, Simplified Chinese | +| [mmlu](mmlu/README.md) | Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported. | English | +| [mmlu_pro](mmlu_pro/README.md) | A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options. | English | +| [mmlusr](mmlusr/README.md) | Variation of MMLU designed to be more rigorous. | English | +| model_written_evals | Evaluation tasks auto-generated for evaluating a collection of AI Safety concerns. | | +| [moral_stories](moral_stories/README.md) | A crowd-sourced dataset of structured narratives that describe normative and norm-divergent actions taken by individuals to accomplish certain intentions in concrete situations. | English +| [mutual](mutual/README.md) | A retrieval-based dataset for multi-turn dialogue reasoning. | English | +| [nq_open](nq_open/README.md) | Open domain question answering tasks based on the Natural Questions dataset. | English | +| [okapi/arc_multilingual](okapi/arc_multilingual/README.md) | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (31 languages) **Machine Translated.** | +| [okapi/hellaswag_multilingual](okapi/hellaswag_multilingual/README.md) | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (30 languages) **Machine Translated.** | +| okapi/mmlu_multilingual | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (34 languages) **Machine Translated.** | | [okapi/truthfulqa_multilingual](okapi/truthfulqa_multilingual/README.md) | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (31 languages) **Machine Translated.** | -| [openbookqa](openbookqa/README.md) | Open-book question answering tasks that require external knowledge and reasoning. | English | -| [paloma](paloma/README.md) | Paloma is a comprehensive benchmark designed to evaluate open language models across a wide range of domains, ranging from niche artist communities to mental health forums on Reddit. | English | -| [paws-x](paws-x/README.md) | Paraphrase Adversaries from Word Scrambling, focusing on cross-lingual capabilities. | English, French, Spanish, German, Chinese, Japanese, Korean | -| [pile](pile/README.md) | Open source language modelling data set that consists of 22 smaller, high-quality datasets. | English | -| [pile_10k](pile_10k/README.md) | The first 10K elements of The Pile, useful for debugging models trained on it. | English | -| [piqa](piqa/README.md) | Physical Interaction Question Answering tasks to test physical commonsense reasoning. | English | -| [polemo2](polemo2/README.md) | Sentiment analysis and emotion detection tasks based on Polish language data. | Polish | -| [portuguese_bench](portuguese_bench/README.md) | Collection of tasks in European Portuguese encompassing various evaluation areas. | Portuguese | -| [prost](prost/README.md) | Tasks requiring understanding of professional standards and ethics in various domains. | English | -| [pubmedqa](pubmedqa/README.md) | Question answering tasks based on PubMed research articles for biomedical understanding. | English | -| [qa4mre](qa4mre/README.md) | Question Answering for Machine Reading Evaluation, assessing comprehension and reasoning. | English | -| [qasper](qasper/README.md) | Question Answering dataset based on academic papers, testing in-depth scientific knowledge. | English | -| [race](race/README.md) | Reading comprehension assessment tasks based on English exams in China. | English | -| realtoxicityprompts | Tasks to evaluate language models for generating text with potential toxicity. | | -| [sciq](sciq/README.md) | Science Question Answering tasks to assess understanding of scientific concepts. | English | -| [score](score/README.md) | Systematic consistency and robustness evaluation for LLMs on 3 datasets(MMLU-Pro, Agi Eval and MATH) | English | -| [scrolls](scrolls/README.md) | Tasks that involve long-form reading comprehension across various domains. | English | -| [siqa](siqa/README.md) | Social Interaction Question Answering to evaluate common sense and social reasoning. | English | -| [spanish_bench](spanish_bench/README.md) | Collection of tasks in Spanish encompassing various evaluation areas. | Spanish | -| [squad_completion](squad_completion/README.md) | A variant of the SQuAD question answering task designed for zero-shot evaluation of small LMs. | English | -| [squadv2](squadv2/README.md) | Stanford Question Answering Dataset version 2, a reading comprehension benchmark. | English | -| [storycloze](storycloze/README.md) | Tasks to predict story endings, focusing on narrative logic and coherence. | English | -| [super_glue](super_glue/README.md) | A suite of challenging tasks designed to test a range of language understanding skills. | English | -| [swag](swag/README.md) | Situations With Adversarial Generations, predicting the next event in videos. | English | -| [swde](swde/README.md) | Information extraction tasks from semi-structured web pages. | English | -| [tinyBenchmarks](tinyBenchmarks/README.md) | Evaluation of large language models with fewer examples using tiny versions of popular benchmarks. | English | -| [tmmluplus](tmmluplus/README.md) | An extended set of tasks under the TMMLU framework for broader academic assessments. | Traditional Chinese | -| [toxigen](toxigen/README.md) | Tasks designed to evaluate language models on their propensity to generate toxic content. | English | -| [translation](translation/README.md) | Tasks focused on evaluating the language translation capabilities of models. | Arabic, English, Spanish, Basque, Hindi, Indonesian, Burmese, Russian, Swahili, Telugu, Chinese | -| [triviaqa](triviaqa/README.md) | A large-scale dataset for trivia question answering to test general knowledge. | English | -| [truthfulqa](truthfulqa/README.md) | A QA task aimed at evaluating the truthfulness and factual accuracy of model responses. | English | -| [turkishmmlu](turkishmmlu/README.md) | A multiple-choice QA test modeled after MMLU, written in Turkish based on Turkish high-school level exams. | Turkish | -| [unitxt](unitxt/README.md) | A number of tasks implemented using the unitxt library for flexible, shareable, and reusable data preparation and evaluation for generative AI. | English | -| [unscramble](unscramble/README.md) | Tasks involving the rearrangement of scrambled sentences to test syntactic understanding. | English | -| [webqs](webqs/README.md) | Web-based question answering tasks designed to evaluate internet search and retrieval. | English | -| [wikitext](wikitext/README.md) | Tasks based on text from Wikipedia articles to assess language modeling and generation. | English | -| [winogrande](winogrande/README.md) | A large-scale dataset for coreference resolution, inspired by the Winograd Schema Challenge. | English | -| [wmdp](wmdp/README.md) | A benchmark with the objective of minimizing performance, based on potentially-sensitive multiple-choice knowledge questions. | English | -| [wmt2016](wmt2016/README.md) | Tasks from the WMT 2016 shared task, focusing on translation between multiple languages. | English, Czech, German, Finnish, Russian, Romanian, Turkish | -| [wsc273](wsc273/README.md) | The Winograd Schema Challenge, a test of commonsense reasoning and coreference resolution. | English | -| [xcopa](xcopa/README.md) | Cross-lingual Choice of Plausible Alternatives, testing reasoning in multiple languages. | Estonian, Haitian, Indonesian, Italian, Quechua, Swahili, Tamil, Thai, Turkish, Vietnamese, Chinese | -| [xnli](xnli/README.md) | Cross-Lingual Natural Language Inference to test understanding across different languages. | Arabic, Bulgarian, German, Greek, English, Spanish, French, Hindi, Russian, Swahili, Thai, Turkish, Urdu, Vietnamese, Chinese | -| [xnli_eu](xnli_eu/README.md) | Cross-lingual Natural Language Inference tasks in Basque. | Basque | -| [xquad](xquad/README.md) | Cross-lingual Question Answering Dataset in multiple languages. | Arabic, German, Greek, English, Spanish, Hindi, Romanian, Russian, Thai, Turkish, Vietnamese, Chinese | -| [xstorycloze](xstorycloze/README.md) | Cross-lingual narrative understanding tasks to predict story endings in multiple languages. | Russian, Simplified Chinese, Spanish, Arabic, Hindi, Indonesian, Telugu, Swahili, Basque, Burmese | -| [xwinograd](xwinograd/README.md) | Cross-lingual Winograd schema tasks for coreference resolution in multiple languages. | English, French, Japanese, Portuguese, Russian, Chinese | +| [openbookqa](openbookqa/README.md) | Open-book question answering tasks that require external knowledge and reasoning. | English | +| [paloma](paloma/README.md) | Paloma is a comprehensive benchmark designed to evaluate open language models across a wide range of domains, ranging from niche artist communities to mental health forums on Reddit. | English | +| [paws-x](paws-x/README.md) | Paraphrase Adversaries from Word Scrambling, focusing on cross-lingual capabilities. | English, French, Spanish, German, Chinese, Japanese, Korean | +| [pile](pile/README.md) | Open source language modelling data set that consists of 22 smaller, high-quality datasets. | English | +| [pile_10k](pile_10k/README.md) | The first 10K elements of The Pile, useful for debugging models trained on it. | English | +| [piqa](piqa/README.md) | Physical Interaction Question Answering tasks to test physical commonsense reasoning. | English | +| [polemo2](polemo2/README.md) | Sentiment analysis and emotion detection tasks based on Polish language data. | Polish | +| [portuguese_bench](portuguese_bench/README.md) | Collection of tasks in European Portuguese encompassing various evaluation areas. | Portuguese | +| [prost](prost/README.md) | Tasks requiring understanding of professional standards and ethics in various domains. | English | +| [pubmedqa](pubmedqa/README.md) | Question answering tasks based on PubMed research articles for biomedical understanding. | English | +| [qa4mre](qa4mre/README.md) | Question Answering for Machine Reading Evaluation, assessing comprehension and reasoning. | English | +| [qasper](qasper/README.md) | Question Answering dataset based on academic papers, testing in-depth scientific knowledge. | English | +| [race](race/README.md) | Reading comprehension assessment tasks based on English exams in China. | English | +| realtoxicityprompts | Tasks to evaluate language models for generating text with potential toxicity. | | +| [sciq](sciq/README.md) | Science Question Answering tasks to assess understanding of scientific concepts. | English | +| [score](score/README.md) | Systematic consistency and robustness evaluation for LLMs on 3 datasets(MMLU-Pro, Agi Eval and MATH) | English | +| [scrolls](scrolls/README.md) | Tasks that involve long-form reading comprehension across various domains. | English | +| [siqa](siqa/README.md) | Social Interaction Question Answering to evaluate common sense and social reasoning. | English | +| [spanish_bench](spanish_bench/README.md) | Collection of tasks in Spanish encompassing various evaluation areas. | Spanish | +| [squad_completion](squad_completion/README.md) | A variant of the SQuAD question answering task designed for zero-shot evaluation of small LMs. | English | +| [squadv2](squadv2/README.md) | Stanford Question Answering Dataset version 2, a reading comprehension benchmark. | English | +| [storycloze](storycloze/README.md) | Tasks to predict story endings, focusing on narrative logic and coherence. | English | +| [super_glue](super_glue/README.md) | A suite of challenging tasks designed to test a range of language understanding skills. | English | +| [swag](swag/README.md) | Situations With Adversarial Generations, predicting the next event in videos. | English | +| [swde](swde/README.md) | Information extraction tasks from semi-structured web pages. | English | +| [tinyBenchmarks](tinyBenchmarks/README.md) | Evaluation of large language models with fewer examples using tiny versions of popular benchmarks. | English | +| [tmmluplus](tmmluplus/README.md) | An extended set of tasks under the TMMLU framework for broader academic assessments. | Traditional Chinese | +| [toxigen](toxigen/README.md) | Tasks designed to evaluate language models on their propensity to generate toxic content. | English | +| [translation](translation/README.md) | Tasks focused on evaluating the language translation capabilities of models. | Arabic, English, Spanish, Basque, Hindi, Indonesian, Burmese, Russian, Swahili, Telugu, Chinese | +| [triviaqa](triviaqa/README.md) | A large-scale dataset for trivia question answering to test general knowledge. | English | +| [truthfulqa](truthfulqa/README.md) | A QA task aimed at evaluating the truthfulness and factual accuracy of model responses. | English | +| [turkishmmlu](turkishmmlu/README.md) | A multiple-choice QA test modeled after MMLU, written in Turkish based on Turkish high-school level exams. | Turkish | +| [unitxt](unitxt/README.md) | A number of tasks implemented using the unitxt library for flexible, shareable, and reusable data preparation and evaluation for generative AI. | English | +| [unscramble](unscramble/README.md) | Tasks involving the rearrangement of scrambled sentences to test syntactic understanding. | English | +| [webqs](webqs/README.md) | Web-based question answering tasks designed to evaluate internet search and retrieval. | English | +| [wikitext](wikitext/README.md) | Tasks based on text from Wikipedia articles to assess language modeling and generation. | English | +| [winogrande](winogrande/README.md) | A large-scale dataset for coreference resolution, inspired by the Winograd Schema Challenge. | English | +| [wmdp](wmdp/README.md) | A benchmark with the objective of minimizing performance, based on potentially-sensitive multiple-choice knowledge questions. | English | +| [wmt2016](wmt2016/README.md) | Tasks from the WMT 2016 shared task, focusing on translation between multiple languages. | English, Czech, German, Finnish, Russian, Romanian, Turkish | +| [wsc273](wsc273/README.md) | The Winograd Schema Challenge, a test of commonsense reasoning and coreference resolution. | English | +| [xcopa](xcopa/README.md) | Cross-lingual Choice of Plausible Alternatives, testing reasoning in multiple languages. | Estonian, Haitian, Indonesian, Italian, Quechua, Swahili, Tamil, Thai, Turkish, Vietnamese, Chinese | +| [xnli](xnli/README.md) | Cross-Lingual Natural Language Inference to test understanding across different languages. | Arabic, Bulgarian, German, Greek, English, Spanish, French, Hindi, Russian, Swahili, Thai, Turkish, Urdu, Vietnamese, Chinese | +| [xnli_eu](xnli_eu/README.md) | Cross-lingual Natural Language Inference tasks in Basque. | Basque | +| [xquad](xquad/README.md) | Cross-lingual Question Answering Dataset in multiple languages. | Arabic, German, Greek, English, Spanish, Hindi, Romanian, Russian, Thai, Turkish, Vietnamese, Chinese | +| [xstorycloze](xstorycloze/README.md) | Cross-lingual narrative understanding tasks to predict story endings in multiple languages. | Russian, Simplified Chinese, Spanish, Arabic, Hindi, Indonesian, Telugu, Swahili, Basque, Burmese | +| [xwinograd](xwinograd/README.md) | Cross-lingual Winograd schema tasks for coreference resolution in multiple languages. | English, French, Japanese, Portuguese, Russian, Chinese | diff --git a/lm_eval/tasks/histoires_morales/README.md b/lm_eval/tasks/histoires_morales/README.md new file mode 100644 index 00000000..cbd14ec3 --- /dev/null +++ b/lm_eval/tasks/histoires_morales/README.md @@ -0,0 +1,62 @@ +# Histoires Morales + +### Paper + +Title: `Histoires Morales: A French Dataset for Assessing Moral Alignment` + +Abstract: `https://arxiv.org/pdf/2501.17117` + +⚖ Histoires Morales is the first dataset for moral model alignment evaluation in French. It consists of narratives describing normative and norm-divergent actions taken by individuals to achieve certain intentions in concrete situations, along with their respective consequences. +Each of the 12,000 stories (histoires) follows the same seven-sentence structure as the Moral Stories dataset: + +Context: + +1. Norm: A guideline for social conduct generally observed by most people in everyday situations. +2. Situation: The setting of the story, introducing participants and describing their environment. +3. Intention: A reasonable goal that one of the story participants (the actor) wants to achieve. + +Normative path: +4. Normative action: An action by the actor that fulfills the intention while observing the norm. +5. Normative consequence: A possible effect of the normative action on the actor’s environment. + +Norm-divergent path: +6. Divergent action: An action by the actor that fulfills the intention but diverges from the norm. +7. Divergent consequence: A possible effect of the divergent action on the actor’s environment. + +Histoires Morales is adapted to French from the widely used Moral Stories dataset. +We translated the Moral Stories dataset and refined these translations through manual annotations. +See paper for more details. + +Homepage: `https://huggingface.co/datasets/LabHC/histoires_morales` + + +### Citation + +Coming soon (accepted to NAACL 2025) + +### Groups, Tags, and Tasks + +#### Groups + +* Not part of a group yet + +#### Tags + +No tags, since there is a single task. + +#### Tasks + +* `histoires_morales.yaml` + +### Checklist + +For adding novel benchmarks/datasets to the library: +* [x] Is the task an existing benchmark in the literature? + * [ ] Have you referenced the original paper that introduced the task? + * [ ] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test? + + +If other tasks on this dataset are already supported: +* [ ] Is the "Main" variant of this task clearly denoted? +* [ ] Have you provided a short sentence in a README on what each new variant adds / evaluates? +* [ ] Have you noted which, if any, published evaluation setups are matched by this variant? diff --git a/lm_eval/tasks/histoires_morales/histoires_morales.yaml b/lm_eval/tasks/histoires_morales/histoires_morales.yaml new file mode 100644 index 00000000..88fcc402 --- /dev/null +++ b/lm_eval/tasks/histoires_morales/histoires_morales.yaml @@ -0,0 +1,17 @@ +task: histoires_morales +dataset_path: LabHC/histoires_morales +output_type: multiple_choice +test_split: train +process_docs: !function utils.process_docs +doc_to_text: "{{query}}" +doc_to_target: "{{label}}" +doc_to_choice: "choices" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true +metadata: + version: 1.0 diff --git a/lm_eval/tasks/histoires_morales/utils.py b/lm_eval/tasks/histoires_morales/utils.py new file mode 100644 index 00000000..2e996b74 --- /dev/null +++ b/lm_eval/tasks/histoires_morales/utils.py @@ -0,0 +1,21 @@ +import datasets + + +def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: + def _process_doc(doc): + ctx = ( + doc["norm"].capitalize() + + " " + + doc["situation"].capitalize() + + " " + + doc["intention"].capitalize() + ) + choices = [doc["moral_action"], doc["immoral_action"]] + out_doc = { + "query": ctx, + "choices": choices, + "label": 0, + } + return out_doc + + return dataset.map(_process_doc) -- GitLab