diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index ed09225cb3cb10bac7496da598f428dc69261124..7e69aee0bfd7c0b375f8d87f2ed3091e82aae6c8 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -22,10 +22,10 @@ jobs: steps: - name: Checkout Code uses: actions/checkout@v4 - - name: Set up Python 3.8 + - name: Set up Python 3.9 uses: actions/setup-python@v5 with: - python-version: 3.8 + python-version: 3.9 cache: pip cache-dependency-path: pyproject.toml - name: Pre-Commit @@ -42,7 +42,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [ "3.8", "3.9", "3.10", "3.11" ] + python-version: ["3.9", "3.10", "3.11", "3.12" ] timeout-minutes: 30 steps: - name: Checkout Code @@ -75,15 +75,16 @@ jobs: steps: - name: Checkout Code uses: actions/checkout@v4 - - name: Set up Python 3.8 + - name: Set up Python 3.9 uses: actions/setup-python@v5 with: - python-version: 3.8 + python-version: 3.9 cache: pip cache-dependency-path: pyproject.toml - name: Install dependencies run: | python -m pip install --upgrade pip pip install -e '.[dev,optimum,deepsparse,sparseml,api]' --extra-index-url https://download.pytorch.org/whl/cpu + pip install -U transformers peft - name: Test with pytest run: python -m pytest tests/models --showlocals -s -vv diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index edeef333a5cba74d4fe20d5a74ce6cf82963802f..3b5da239236951104993af4ed1e231ecd8c5919a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -29,7 +29,7 @@ repos: - id: mixed-line-ending args: [--fix=lf] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.7.4 + rev: v0.9.2 hooks: # Run the linter. - id: ruff diff --git a/README.md b/README.md index 8bea4748526dfa5cf47ed56dbc8b73b314e8dc87..1dc08d55a5ac3105d6948b8c39abe77738e25117 100644 --- a/README.md +++ b/README.md @@ -270,6 +270,7 @@ Note that for externally hosted models, configs such as `--device` which relate | vLLM | :heavy_check_mark: | `vllm` | [Most HF Causal Language Models](https://docs.vllm.ai/en/latest/models/supported_models.html) | `generate_until`, `loglikelihood`, `loglikelihood_rolling` | | Mamba | :heavy_check_mark: | `mamba_ssm` | [Mamba architecture Language Models via the `mamba_ssm` package](https://huggingface.co/state-spaces) | `generate_until`, `loglikelihood`, `loglikelihood_rolling` | | Huggingface Optimum (Causal LMs) | ✔️ | `openvino` | Any decoder-only AutoModelForCausalLM converted with Huggingface Optimum into OpenVINO™ Intermediate Representation (IR) format | `generate_until`, `loglikelihood`, `loglikelihood_rolling` | ... | +| Huggingface Optimum-intel IPEX (Causal LMs) | ✔️ | `ipex` | Any decoder-only AutoModelForCausalLM | `generate_until`, `loglikelihood`, `loglikelihood_rolling` | ... | | Neuron via AWS Inf2 (Causal LMs) | ✔️ | `neuronx` | Any decoder-only AutoModelForCausalLM supported to run on [huggingface-ami image for inferentia2](https://aws.amazon.com/marketplace/pp/prodview-gr3e6yiscria2) | `generate_until`, `loglikelihood`, `loglikelihood_rolling` | ... | | [Neural Magic DeepSparse](https://github.com/neuralmagic/deepsparse) | ✔️ | `deepsparse` | Any LM from [SparseZoo](https://sparsezoo.neuralmagic.com/) or on [HF Hub with the "deepsparse" tag](https://huggingface.co/models?other=deepsparse) | `generate_until`, `loglikelihood` | ... | | [Neural Magic SparseML](https://github.com/neuralmagic/sparseml) | ✔️ | `sparseml` | Any decoder-only AutoModelForCausalLM from [SparseZoo](https://sparsezoo.neuralmagic.com/) or on [HF Hub](https://huggingface.co/neuralmagic). Especially useful for models with quantization like [`zoo:llama2-7b-gsm8k_llama2_pretrain-pruned60_quantized`](https://sparsezoo.neuralmagic.com/models/llama2-7b-gsm8k_llama2_pretrain-pruned60_quantized) | `generate_until`, `loglikelihood`, `loglikelihood_rolling` | ... | @@ -492,6 +493,7 @@ Extras dependencies can be installed via `pip install -e ".[NAME]"` | hf_transfer | For speeding up HF Hub file downloads | | ifeval | For running the IFEval task | | ibm_watsonx_ai | For using IBM watsonx.ai model apis | +| ipex | For running on optimum-intel ipex backend | | neuronx | For running on AWS inf2 instances | | mamba | For loading Mamba SSM models | | math | For running math task answer checking | diff --git a/docs/interface.md b/docs/interface.md index 47cf00b49694bdbdd86a431c318f0497a2cb4f5a..cea1aab027bccdb7a12df12fda56d40202f256ac 100644 --- a/docs/interface.md +++ b/docs/interface.md @@ -58,7 +58,7 @@ This mode supports a number of command-line arguments, the details of which can * `--seed`: Set seed for python's random, numpy and torch. Accepts a comma-separated list of 3 values for python's random, numpy, and torch seeds, respectively, or a single integer to set the same seed for all three. The values are either an integer or 'None' to not set the seed. Default is `0,1234,1234` (for backward compatibility). E.g. `--seed 0,None,8` sets `random.seed(0)` and `torch.manual_seed(8)`. Here numpy's seed is not set since the second value is `None`. E.g, `--seed 42` sets all three seeds to 42. -* `--wandb_args`: Tracks logging to Weights and Biases for evaluation runs and includes args passed to `wandb.init`, such as `project` and `job_type`. Full list [here](https://docs.wandb.ai/ref/python/init). e.g., ```--wandb_args project=test-project,name=test-run``` +* `--wandb_args`: Tracks logging to Weights and Biases for evaluation runs and includes args passed to `wandb.init`, such as `project` and `job_type`. Full list [here](https://docs.wandb.ai/ref/python/init). e.g., ```--wandb_args project=test-project,name=test-run```. Also allows for the passing of the step to log things at (passed to `wandb.run.log`), e.g., `--wandb_args step=123`. * `--hf_hub_log_args` : Logs evaluation results to Hugging Face Hub. Accepts a string with the arguments separated by commas. Available arguments: * `hub_results_org` - organization name on Hugging Face Hub, e.g., `EleutherAI`. If not provided, the results will be pushed to the owner of the Hugging Face token, diff --git a/docs/new_task_guide.md b/docs/new_task_guide.md index dac8541e82e5175ac602c440f2be4b7eb8abd18a..a822a8879e8f9825a257fd00493667521fb6a07d 100644 --- a/docs/new_task_guide.md +++ b/docs/new_task_guide.md @@ -190,7 +190,8 @@ doc_to_target: "{{answer}}" ``` -**Important**: we now add `target_delimiter` between input and target which defaults to " ", such that the full input-output string is `doc_to_target(doc) + target_delimiter + doc_to_text(doc)`. `doc_to_text` and `doc_to_target` should not contain trailing right or left whitespace, respectively. +> [!WARNING] +> We add `target_delimiter` between input and target which defaults to " ", such that the full input-output string is `doc_to_text(doc) + target_delimiter + doc_to_target(doc)`. `doc_to_text` and `doc_to_target` should not contain trailing right or left whitespace, respectively. For multiple choice the target will be each choice index concatenated with the delimiter. #### Multiple choice format @@ -206,7 +207,7 @@ doc_to_choice: "{{[distractor1, distractor2, distractor3, correct_answer]}}" ``` Task implementers are thus able to decide what the answer choices should be for a document, and what prompt format to use. -The label index can also be sourced from a feature directly. For example in `superglue/boolq`, the label index if defined in the feature `label`. We can set `doc_to_target` as simply `label`. The options or verbalizers can be written in a the form of a list `["no", "yes"]` that will correspond to the label index. +The label index can also be sourced from a feature directly. For example in `superglue/boolq`, the label index if defined in the feature `label`. We can set `doc_to_target` as simply `label`. The options or verbalizers can be written in the form of a list `["no", "yes"]` that will correspond to the label index. ```yaml doc_to_text: "{{passage}}\nQuestion: {{question}}?\nAnswer:" diff --git a/docs/task_guide.md b/docs/task_guide.md index 34e47c413694eeb8da2d3dc5c743eaba2740e0b0..23fbd1b9b5ddf12eba4b8017004c43f1c9e9d4d9 100644 --- a/docs/task_guide.md +++ b/docs/task_guide.md @@ -37,6 +37,7 @@ Prompting / in-context formatting options: - **doc_to_choice** (`Union[Callable, str]`, *optional*) — Jinja2 template, string, or function to process a sample into a list of possible string choices for `multiple_choice` tasks. Left undefined for `generate_until` tasks. - **fewshot_delimiter** (`str`, *optional*, defaults to "\n\n") — String to insert between few-shot examples. - **target_delimiter** (`str`, *optional*, defaults to `" "`) — String to insert between input and target output for the datapoint being tested. +- **assistant_prefill** (`str`, *optional*) — String to append after the <|assistant|> token. For example, if the task is to generate a question, the assistant_prefill could be "The answer is: " to prompt the model to generate an answer to the question. If not using a chat template then this string will be appended to the end of the prompt. Runtime configuration options: - **num_fewshot** (`int`, *optional*, defaults to 0) — Number of few-shot examples before the input. diff --git a/lm_eval/__main__.py b/lm_eval/__main__.py index ab68781939599c9fe959c5b642ff385db1067510..8c72f4b18b2ccaac2e1c0753669686d4963b9ea1 100644 --- a/lm_eval/__main__.py +++ b/lm_eval/__main__.py @@ -257,6 +257,11 @@ def setup_parser() -> argparse.ArgumentParser: action="store_true", help="Sets trust_remote_code to True to execute code to create HF Datasets from the Hub", ) + parser.add_argument( + "--confirm_run_unsafe_code", + action="store_true", + help="Confirm that you understand the risks of running unsafe code for tasks that require it", + ) return parser @@ -404,6 +409,7 @@ def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None: numpy_random_seed=args.seed[1], torch_random_seed=args.seed[2], fewshot_random_seed=args.seed[3], + confirm_run_unsafe_code=args.confirm_run_unsafe_code, **request_caching_args, ) diff --git a/lm_eval/api/group.py b/lm_eval/api/group.py index e258692b9fad1cf570a2423c05d25d1604885d7e..0c60739bbd26c79ecab91f54240798b2ae9e3313 100644 --- a/lm_eval/api/group.py +++ b/lm_eval/api/group.py @@ -112,6 +112,4 @@ class ConfigurableGroup(abc.ABC): return self._config.group def __repr__(self): - return ( - f"ConfigurableGroup(group={self.group}," f"group_alias={self.group_alias})" - ) + return f"ConfigurableGroup(group={self.group},group_alias={self.group_alias})" diff --git a/lm_eval/api/metrics.py b/lm_eval/api/metrics.py index a8459aa7397fd02947917dad616520bb4cb777bd..56ba231be515e1abad897cf211bd03c377f7a31f 100644 --- a/lm_eval/api/metrics.py +++ b/lm_eval/api/metrics.py @@ -527,9 +527,9 @@ def pooled_sample_stderr(stderrs: List[float], sizes: List[int]): def combined_sample_stderr(stderrs: List[float], sizes: List[int], metrics=None): - assert ( - metrics is not None - ), "Need to pass a list of each subtask's metric for this stderr aggregation" + assert metrics is not None, ( + "Need to pass a list of each subtask's metric for this stderr aggregation" + ) assert len(stderrs) == len(sizes) and len(sizes) == len(metrics) # See https://github.com/EleutherAI/lm-evaluation-harness/pull/1390 for more documentation. diff --git a/lm_eval/api/model.py b/lm_eval/api/model.py index b5c2999336471107dc7e4aac7cce6b725e33506e..5a03bcbde9c819c4732930f7ac42e67b317601df 100644 --- a/lm_eval/api/model.py +++ b/lm_eval/api/model.py @@ -113,13 +113,17 @@ class LM(abc.ABC): """ pass - def apply_chat_template(self, chat_history: List[Dict[str, str]]) -> str: + def apply_chat_template( + self, chat_history: List[Dict[str, str]], add_generation_prompt=True + ) -> str: """ Defines how to transform few-shot examples provided as chat history into a format that can be used as input to the LM. :param chat_history: list[dict[str, str]] A list of dictionaries with keys 'role' and 'content'. Values are strings representing the role name and the content of the message, respectively. + :param add_generation_prompt: bool + Whether to append an assistant gen prefix (for e.g. <|assistant|>) to the assistant messages in the chat history. False if prefilling an assistant message. :return: str A string representing the chat history in a format that can be used as input to the LM. """ diff --git a/lm_eval/api/registry.py b/lm_eval/api/registry.py index f8f289371e2cbf43850f43ce0a1fd32c5bdffeff..6d16639e8b80668702c6437554acf289edd3ab26 100644 --- a/lm_eval/api/registry.py +++ b/lm_eval/api/registry.py @@ -17,13 +17,13 @@ def register_model(*names): def decorate(cls): for name in names: - assert issubclass( - cls, LM - ), f"Model '{name}' ({cls.__name__}) must extend LM class" + assert issubclass(cls, LM), ( + f"Model '{name}' ({cls.__name__}) must extend LM class" + ) - assert ( - name not in MODEL_REGISTRY - ), f"Model named '{name}' conflicts with existing model! Please register with a non-conflicting alias instead." + assert name not in MODEL_REGISTRY, ( + f"Model named '{name}' conflicts with existing model! Please register with a non-conflicting alias instead." + ) MODEL_REGISTRY[name] = cls return cls @@ -48,9 +48,9 @@ func2task_index = {} def register_task(name): def decorate(fn): - assert ( - name not in TASK_REGISTRY - ), f"task named '{name}' conflicts with existing registered task!" + assert name not in TASK_REGISTRY, ( + f"task named '{name}' conflicts with existing registered task!" + ) TASK_REGISTRY[name] = fn ALL_TASKS.add(name) @@ -104,9 +104,9 @@ def register_metric(**args): ]: if key in args: value = args[key] - assert ( - value not in registry - ), f"{key} named '{value}' conflicts with existing registered {key}!" + assert value not in registry, ( + f"{key} named '{value}' conflicts with existing registered {key}!" + ) if key == "metric": registry[name] = fn @@ -140,9 +140,9 @@ def get_metric(name: str, hf_evaluate_metric=False) -> Callable: def register_aggregation(name: str): def decorate(fn): - assert ( - name not in AGGREGATION_REGISTRY - ), f"aggregation named '{name}' conflicts with existing registered aggregation!" + assert name not in AGGREGATION_REGISTRY, ( + f"aggregation named '{name}' conflicts with existing registered aggregation!" + ) AGGREGATION_REGISTRY[name] = fn return fn diff --git a/lm_eval/api/samplers.py b/lm_eval/api/samplers.py index 2cdc4e43e7f73065b1df554f729d7bd92c4398b5..3f81dfc66ee8f683b32f0ef8eb78876c9cd67aef 100644 --- a/lm_eval/api/samplers.py +++ b/lm_eval/api/samplers.py @@ -1,10 +1,23 @@ from functools import partial +from typing import TYPE_CHECKING, Iterable, Optional, Union import datasets +if TYPE_CHECKING: + from random import Random + + from lm_eval.api.task import ConfigurableTask, Task + + class ContextSampler: - def __init__(self, docs, task, fewshot_indices=None, rnd=None) -> None: + def __init__( + self, + docs: list[dict], + task: Union["Task", "ConfigurableTask"], + fewshot_indices: Optional[Iterable] = None, + rnd: Optional["Random"] = None, + ) -> None: self.rnd = rnd if not self.rnd: raise ValueError( @@ -58,8 +71,9 @@ class ContextSampler: ) self.docs = self.docs.select(fewshot_indices) - def get_context(self, doc, num_fewshot): + def get_context(self, doc: dict, num_fewshot: int, gen_prefix: str = None): # draw an extra fewshot sample if using same split as evaluating on + prefix = gen_prefix + " " if gen_prefix else "" n_samples = ( num_fewshot + 1 if self.config.fewshot_split == self.config.test_split @@ -77,14 +91,14 @@ class ContextSampler: for doc in selected_docs: doc_content = self.doc_to_text(doc) doc_target = self.doc_to_target(doc) - labeled_examples += ( - doc_content - if self.config.doc_to_choice is None or isinstance(doc_content, str) - else self.doc_to_choice(doc)[doc_content] - ) + if self.config.doc_to_choice is None or isinstance(doc_content, str): + labeled_examples += doc_content + else: + labeled_examples += self.doc_to_choice(doc)[doc_content] if doc_target != "": labeled_examples += self.target_delimiter + labeled_examples += prefix labeled_examples += ( str(doc_target[0]) if isinstance(doc_target, list) @@ -98,10 +112,13 @@ class ContextSampler: def get_chat_context( self, - doc, - num_fewshot, + doc: dict, + num_fewshot: int, fewshot_as_multiturn: bool = False, + gen_prefix: Optional[str] = None, ): + # TODO: Do we need any other delimiter + prefix = gen_prefix + " " if gen_prefix else "" chat_history = [] # draw an extra fewshot sample if using same split as evaluating on n_samples = ( @@ -132,23 +149,28 @@ class ContextSampler: chat_history.append( { "role": "assistant", - "content": str(doc_target[0]) + "content": prefix + str(doc_target[0]) if isinstance(doc_target, list) - else doc_target + else prefix + doc_target if self.config.doc_to_choice is None or isinstance(doc_target, str) - else str(self.doc_to_choice(doc)[doc_target]), + else prefix + str(self.doc_to_choice(doc)[doc_target]), } ) else: # get fewshot context as one user turn chat_history.append( - {"role": "user", "content": self.get_context(doc, num_fewshot)} + { + "role": "user", + "content": self.get_context( + doc, num_fewshot, gen_prefix=gen_prefix + ), + } ) return chat_history - def sample(self, n): + def sample(self, n: int): """ Draw `n` samples from our fewshot docs. This method should be overridden by subclasses. """ @@ -157,19 +179,19 @@ class ContextSampler: class FirstNSampler(ContextSampler): - def sample(self, n) -> None: + def sample(self, n: int) -> None: """ Draw the first `n` samples in order from the specified split. Used for tasks with "canonical" ordered fewshot examples, such as MMLU and CMMLU. """ - assert ( - n <= len(self.docs) - ), f"Error: number of fewshot samples requested exceeds the {len(self.docs)} that are available." + assert n <= len(self.docs), ( + f"Error: number of fewshot samples requested exceeds the {len(self.docs)} that are available." + ) return self.docs[:n] class BalancedSampler(ContextSampler): - def sample(self, n) -> None: + def sample(self, n: int) -> None: """ TODO: this should return approximately class-balanced samples from our fewshot examples. TODO: what order should they be in? maybe random? @@ -179,7 +201,7 @@ class BalancedSampler(ContextSampler): class ManualSampler(ContextSampler): - def sample(self, n) -> None: + def sample(self, n: int) -> None: """ """ pass @@ -190,7 +212,7 @@ SAMPLER_REGISTRY = { } -def get_sampler(name): +def get_sampler(name: str): try: return SAMPLER_REGISTRY[name] except KeyError: diff --git a/lm_eval/api/task.py b/lm_eval/api/task.py index 555cb4330d9795ce920a0f1f220750be4d086c9d..f14f36e818e95016bf0355dd91c8688e27d45cbf 100644 --- a/lm_eval/api/task.py +++ b/lm_eval/api/task.py @@ -75,6 +75,7 @@ class TaskConfig(dict): doc_to_text: Optional[Union[Callable, str]] = None doc_to_target: Optional[Union[Callable, str]] = None doc_to_image: Union[Callable, str] = None + unsafe_code: bool = False doc_to_choice: Optional[Union[Callable, str, dict, list]] = None process_results: Optional[Union[Callable, str]] = None use_prompt: Optional[str] = None @@ -92,6 +93,7 @@ class TaskConfig(dict): filter_list: Optional[Union[str, list]] = None should_decontaminate: bool = False doc_to_decontamination_query: Optional[str] = None + gen_prefix: Optional[str] = None metadata: Optional[dict] = ( None # by default, not used in the code. allows for users to pass arbitrary info to tasks ) @@ -369,6 +371,9 @@ class Task(abc.ABC): def doc_to_image(self, doc): raise NotImplementedError + def doc_to_prefix(self, doc): + return "" + def build_all_requests( self, *, @@ -398,7 +403,7 @@ class Task(abc.ABC): ) cache_key += f"-tokenizer{tokenizer_name}" - cached_instances = load_from_cache(file_name=cache_key) + cached_instances = load_from_cache(file_name=cache_key, cache=cache_requests) if cache_requests and cached_instances and not rewrite_requests_cache: cached_instances = cached_instances[:limit] @@ -442,6 +447,7 @@ class Task(abc.ABC): apply_chat_template, fewshot_as_multiturn, chat_template, + gen_prefix=self.doc_to_prefix(doc), ) # TODO: we should override self.config.repeats if doing greedy gen so users don't waste time+compute @@ -541,13 +547,7 @@ class Task(abc.ABC): return len(re.split(r"\s+", doc)) @utils.positional_deprecated - def fewshot_context( - self, - doc, - num_fewshot, - rnd=None, - description=None, - ): + def fewshot_context(self, doc, num_fewshot, rnd=None, description=None, **kwargs): """Returns a fewshot context string that is made up of a prepended description (if provided), the `num_fewshot` number of examples, and an appended prompt example. @@ -732,6 +732,9 @@ class ConfigurableTask(Task): # mark the task as requiring multimodality. self.MULTIMODAL = True + if self.config.unsafe_code is not False: + self.UNSAFE_CODE = True + if self.config.dataset_path is not None: self.DATASET_PATH = self.config.dataset_path @@ -1000,6 +1003,7 @@ class ConfigurableTask(Task): labeled_examples: List[Dict[str, str]], question: str, fewshot_as_multiturn: bool = False, + gen_prefix: Optional[str] = None, ) -> None: """Adds a target question to the labeled examples list. If fewshot_as_multiturn is True, or labeled_examples is empty, or the last entry is a system turn, appends the question as a new user entry. @@ -1015,17 +1019,20 @@ class ConfigurableTask(Task): else: # if fewshot_as_multiturn is True, append as next user entry (last is always assistant) labeled_examples.append({"role": "user", "content": question}) + if gen_prefix: + labeled_examples.append({"role": "assistant", "content": gen_prefix}) @utils.positional_deprecated def fewshot_context( self, - doc: str, + doc: dict, num_fewshot: int, system_instruction: Optional[str] = None, apply_chat_template: bool = False, fewshot_as_multiturn: bool = False, chat_template: Optional[Callable] = None, - ) -> str: + gen_prefix: Optional[str] = None, + ) -> Union[str, List[str]]: """Returns a fewshot context string that is made up of a prepended description (if provided), the `num_fewshot` number of examples, and an appended prompt example. @@ -1044,7 +1051,6 @@ class ConfigurableTask(Task): :returns: str The fewshot context. """ - if apply_chat_template: labeled_examples = [] else: @@ -1072,25 +1078,33 @@ class ConfigurableTask(Task): labeled_examples.append({"role": "system", "content": system_prompt}) else: labeled_examples = system_prompt - # if few-shot - append examples after the system prompt if num_fewshot > 0: if apply_chat_template: labeled_examples.extend( self.sampler.get_chat_context( - doc, num_fewshot, fewshot_as_multiturn + doc, + num_fewshot, + fewshot_as_multiturn, + gen_prefix=gen_prefix, ) ) else: - labeled_examples += self.sampler.get_context(doc, num_fewshot) + labeled_examples += self.sampler.get_context( + doc, num_fewshot, gen_prefix=gen_prefix + ) example = self.doc_to_text(doc) if apply_chat_template: if self.multiple_input: + # TODO: append prefill? return chat_template(labeled_examples) if isinstance(example, str): self.append_target_question( - labeled_examples, example, fewshot_as_multiturn + labeled_examples, + example, + fewshot_as_multiturn, + gen_prefix=gen_prefix, ) # for loglikelihood create a list of questions with appended choices elif isinstance(example, list): @@ -1098,37 +1112,62 @@ class ConfigurableTask(Task): # copy chat history for each example and append the answer for ex in example: chat = deepcopy(labeled_examples) - self.append_target_question(chat, ex, fewshot_as_multiturn) - labeled_examples_list.append(chat_template(chat)) + self.append_target_question( + chat, + ex, + fewshot_as_multiturn, + gen_prefix=gen_prefix, + ) + # TODO: append prefill? + labeled_examples_list.append( + chat_template( + chat, + add_generation_prompt=False if gen_prefix else True, + ) + ) return labeled_examples_list # if example is an integer, append the choice or convert to string elif isinstance(example, int): if self.config.doc_to_choice is not None: choices = self.doc_to_choice(doc) self.append_target_question( - labeled_examples, choices[example], fewshot_as_multiturn + labeled_examples, + choices[example], + fewshot_as_multiturn, + gen_prefix=gen_prefix, ) else: self.append_target_question( - labeled_examples, str(example), fewshot_as_multiturn + labeled_examples, + str(example), + fewshot_as_multiturn, + gen_prefix=gen_prefix, ) # return lm.apply_chat_template(labeled_examples) - return chat_template(labeled_examples) + return chat_template( + labeled_examples, + add_generation_prompt=False if gen_prefix else True, + ) else: + prefix = ( + self.config.target_delimiter + gen_prefix + if gen_prefix is not None + else "" + ) if self.multiple_input: return labeled_examples if isinstance(example, str): - return labeled_examples + example + return labeled_examples + example + prefix elif isinstance(example, list): - return [labeled_examples + ex for ex in example] + return [labeled_examples + ex + prefix for ex in example] elif isinstance(example, int): if self.config.doc_to_choice is not None: choices = self.doc_to_choice(doc) - return labeled_examples + choices[example] + return labeled_examples + choices[example] + prefix else: - return labeled_examples + str(example) + return labeled_examples + str(example) + prefix - def apply_filters(self): + def apply_filters(self) -> Optional[List[Instance]]: """Iterates over FilterEnsembles and applies them to instances""" if hasattr(self, "_filters"): for f in self._filters: @@ -1140,7 +1179,7 @@ class ConfigurableTask(Task): def should_decontaminate(self): return self.config.should_decontaminate - def doc_to_decontamination_query(self, doc): + def doc_to_decontamination_query(self, doc: dict): if self.config.should_decontaminate: if self.config.doc_to_decontamination_query is None: return self.doc_to_text(doc) @@ -1299,6 +1338,14 @@ class ConfigurableTask(Task): else: return None + def doc_to_prefix(self, doc): + if (gen_prefix := self.config.gen_prefix) is not None: + if gen_prefix in self.features: + return doc[gen_prefix] + else: + return utils.apply_template(gen_prefix, doc) + return None + def construct_requests( self, doc: dict, ctx: str, **kwargs ) -> Union[List[Instance], Instance]: @@ -1503,9 +1550,9 @@ class ConfigurableTask(Task): # we expect multiple_targets to be a list. elif self.multiple_target: gold = list(gold) - elif ( - type(gold) is not type(result) - and "bypass" not in self._metric_fn_list.keys() + # TODO: handle this better + elif type(gold) is not type(result) and not ( + "bypass" in self._metric_fn_list.keys() or isinstance(result, list) ): # cast gold to the same type as result gold = type(result)(gold) @@ -1561,7 +1608,10 @@ class ConfigurableTask(Task): result_score = self._metric_fn_list[metric]([gold, result]) if isinstance(result_score, dict): # TODO: this handles the case where HF evaluate returns a dict. - result_score = result_score[metric] + # This allows for multiple metrics to be returned from the same function + for k, v in result_score.items(): + result_dict[k] = v + return result_dict result_dict[metric] = result_score else: raise ValueError( diff --git a/lm_eval/caching/cache.py b/lm_eval/caching/cache.py index 63691435215a05894d206f3f8218ab23c5d2e250..4bff225fc0dbddbe05df1c7157a63690a0ca663a 100644 --- a/lm_eval/caching/cache.py +++ b/lm_eval/caching/cache.py @@ -21,7 +21,9 @@ HASH_PREFIX = hashlib.sha256(HASH_INPUT.encode("utf-8")).hexdigest() FILE_SUFFIX = f".{HASH_PREFIX}.pickle" -def load_from_cache(file_name): +def load_from_cache(file_name: str, cache: bool = False): + if not cache: + return try: path = f"{PATH}/{file_name}{FILE_SUFFIX}" diff --git a/lm_eval/decontamination/archiver.py b/lm_eval/decontamination/archiver.py index fa8a715f78e4cccef9f930e5cf448c4481730c2d..c132232116c2ae5f5ab1dc3a2a0afc0dbd4ef1bd 100644 --- a/lm_eval/decontamination/archiver.py +++ b/lm_eval/decontamination/archiver.py @@ -110,12 +110,15 @@ class TextReader: def read_tqdm(self, update_frequency: int = 10000): current_file_position = 0 line_counter = 0 - with open(self.file_path, "r", encoding="utf-8") as fh, tqdm.tqdm( - total=os.path.getsize(self.file_path), - dynamic_ncols=True, - unit="byte", - unit_scale=1, - ) as progress: + with ( + open(self.file_path, "r", encoding="utf-8") as fh, + tqdm.tqdm( + total=os.path.getsize(self.file_path), + dynamic_ncols=True, + unit="byte", + unit_scale=1, + ) as progress, + ): with mmap.mmap(fh.fileno(), length=0, access=mmap.ACCESS_READ) as mmap_obj: for line in iter(mmap_obj.readline, b""): line = line.decode("utf-8") diff --git a/lm_eval/decontamination/decontaminate.py b/lm_eval/decontamination/decontaminate.py index 3874eb58be99aebd2736aeede76c13145231434f..2d1250d39bf7cd0272e412452d970ec7c52992c5 100644 --- a/lm_eval/decontamination/decontaminate.py +++ b/lm_eval/decontamination/decontaminate.py @@ -151,7 +151,7 @@ def get_train_overlap(docs_by_task_set: dict, ngrams_path: str, limit: int) -> d elapsed = time.perf_counter() - start print(f"Read took {elapsed:0.5f} seconds.") - print(f"Speed: {(os.path.getsize(file)/1000000.0)/elapsed}MB/second") + print(f"Speed: {(os.path.getsize(file) / 1000000.0) / elapsed}MB/second") print(duplicates) diff --git a/lm_eval/evaluator.py b/lm_eval/evaluator.py index e7dd3043cbd26b4ff30cc8a178cb7d27d3f422d2..efa0f9191b638bba4a47008aa73350d96e4dd19b 100644 --- a/lm_eval/evaluator.py +++ b/lm_eval/evaluator.py @@ -74,6 +74,7 @@ def simple_evaluate( numpy_random_seed: int = 1234, torch_random_seed: int = 1234, fewshot_random_seed: int = 1234, + confirm_run_unsafe_code: bool = False, ): """Instantiate and evaluate a model on a list of tasks. @@ -313,6 +314,7 @@ def simple_evaluate( apply_chat_template=apply_chat_template, fewshot_as_multiturn=fewshot_as_multiturn, verbosity=verbosity, + confirm_run_unsafe_code=confirm_run_unsafe_code, ) if lm.rank == 0: @@ -372,6 +374,7 @@ def evaluate( apply_chat_template: Union[bool, str] = False, fewshot_as_multiturn: bool = False, verbosity: str = "INFO", + confirm_run_unsafe_code: bool = False, ): """Instantiate and evaluate a model on a list of tasks. @@ -381,6 +384,10 @@ def evaluate( Dictionary of tasks. Tasks will be taken to have name type(task).config.task . :param limit: int, optional Limit the number of examples per task (only use this for testing) + :param cache_requests: bool, optional + Speed up evaluation by caching the building of dataset requests. + :param rewrite_requests_cache: bool, optional + Rewrites all the request cache if set to `True`. :param bootstrap_iters: Number of iterations for bootstrap statistics, used when calculating stderr. Set to 0 for skipping all stderr calculations. :param write_out: bool @@ -396,6 +403,10 @@ def evaluate( Defaults to False (no chat template applied). :param fewshot_as_multiturn: bool Whether to provide the fewshot examples as a multiturn conversation or a single user turn. + :param verbosity: str + Verbosity level for logging + :param confirm_run_unsafe_code: bool + Whether to confirm running tasks marked as unsafe. :return Dictionary of results """ @@ -422,13 +433,19 @@ def evaluate( ): raise ValueError("log_samples must be True for 'bypass' metric-only tasks") - # validation check: are we running multimodal task <-> non-multimodal model class, or vice-versa. + # validation checks: + # 1.are we running multimodal task <-> non-multimodal model class, or vice-versa. + # 2.are we running code that is marked as unsafe. incompatible_tasks = [] for task_output in eval_tasks: task: Task = task_output.task if getattr(lm, "MULTIMODAL", False) != getattr(task, "MULTIMODAL", False): incompatible_tasks.append(task_output.task_name) + elif getattr(task, "UNSAFE_CODE", False) and not confirm_run_unsafe_code: + raise ValueError( + f"Attempted to run task: {task_output.task_name} which is marked as unsafe. Set confirm_run_unsafe_code=True to run this task." + ) if len(incompatible_tasks) > 0: if not getattr(lm, "MULTIMODAL", False): raise ValueError( @@ -438,7 +455,7 @@ def evaluate( raise ValueError( f"Attempted to run tasks: {incompatible_tasks} which are text-only, but used a model type which only currently supports multimodal tasks." ) - # end multimodality validation check + # end validation check # Cache the limit arg. limit_arg = limit diff --git a/lm_eval/evaluator_utils.py b/lm_eval/evaluator_utils.py index d5a08326014279335521dcd1f5f70c1fe12c5003..5949f75732b5e3d5eba5bf47ab26cb75b4c3558c 100644 --- a/lm_eval/evaluator_utils.py +++ b/lm_eval/evaluator_utils.py @@ -7,6 +7,7 @@ from typing import List, Optional, Tuple, Union from lm_eval.api.group import ConfigurableGroup from lm_eval.api.metrics import ( aggregate_subtask_metrics, + mean, pooled_sample_stderr, stderr_for_metric, ) @@ -99,7 +100,12 @@ class TaskOutput: def calculate_aggregate_metric(self, bootstrap_iters=100000) -> None: for (metric, filter_key), items in self.sample_metrics.items(): - agg_fn = self.task.aggregation()[metric] + try: + agg_fn = self.task.aggregation()[metric] + except KeyError: + # This is when process results output an arbitrary metric + # TODO: Handle this better and allow other aggregate functions other than mean. + agg_fn = mean metric_key = f"{metric},{filter_key}" self.agg_metrics[metric_key] = agg_fn(items) self.sample_len = len(items) # TODO: same sample size for each metric? diff --git a/lm_eval/filters/__init__.py b/lm_eval/filters/__init__.py index 46fa4acd4cc4f06d1f62f25840b3c4d9ffc92b7e..be5c9d43624ea901cc578c65689be5bd263209a5 100644 --- a/lm_eval/filters/__init__.py +++ b/lm_eval/filters/__init__.py @@ -4,7 +4,7 @@ from typing import List from lm_eval.api.filter import FilterEnsemble from lm_eval.api.registry import get_filter -from . import extraction, selection, transformation +from . import custom, extraction, selection, transformation def build_filter_ensemble( diff --git a/lm_eval/filters/custom.py b/lm_eval/filters/custom.py new file mode 100644 index 0000000000000000000000000000000000000000..ab22c51eda74670aaea6699fc68992994c41932d --- /dev/null +++ b/lm_eval/filters/custom.py @@ -0,0 +1,17 @@ +from lm_eval.api.filter import Filter +from lm_eval.api.registry import register_filter + + +@register_filter("custom") +class CustomFilter(Filter): + """ + Custom filter that applies a custom, user-defined function to the model responses. + """ + + def __init__(self, **kwargs) -> None: + self.filter_fn = kwargs.pop("filter_fn") + + super().__init__(**kwargs) + + def apply(self, resps, docs): + return self.filter_fn(resps, docs) diff --git a/lm_eval/filters/extraction.py b/lm_eval/filters/extraction.py index 5dc10863e7198b2275eb8f31460ce87b14ce45f0..9c8d796b6099d89fb6b6e5b2e17444cfa66f1b06 100644 --- a/lm_eval/filters/extraction.py +++ b/lm_eval/filters/extraction.py @@ -8,12 +8,17 @@ from lm_eval.api.registry import register_filter @register_filter("regex") class RegexFilter(Filter): - """ """ + """A filter that extracts values from text using regex pattern matching. + + This filter applies a regex pattern to each model response and extracts matched values. + If no match is found, returns a fallback value. Useful for extracting structured data + (like numbers) from unstructured model outputs. + """ def __init__( self, regex_pattern: str = r"#### (\-?[0-9\.\,]+)", - group_select=0, + group_select: int = 0, fallback: str = "[invalid]", ) -> None: """ @@ -25,7 +30,7 @@ class RegexFilter(Filter): self.group_select = group_select self.fallback = fallback - def apply(self, resps, docs): + def apply(self, resps: list[list[str]], docs: list[dict]) -> list[list[str]]: # here, we assume we have a list, in which each element is # a list of model responses for some particular input/target pair. # so we process each of these (same input/target response sets) @@ -55,12 +60,9 @@ class RegexFilter(Filter): @register_filter("remove_whitespace") class WhitespaceFilter(Filter): - """ """ - - def __init__(self) -> None: - pass + """Filters out leading whitespace from responses.""" - def apply(self, resps, docs): + def apply(self, resps: list[list[str]], docs: list[dict]) -> list[list[str]]: def filter_set(inst): filtered_resp = [] for resp in inst: @@ -105,7 +107,7 @@ class MultiChoiceRegexFilter(RegexFilter): self.ignore_punctuation = ignore_punctuation self.regexes_to_ignore = regexes_to_ignore - def apply(self, resps, docs): + def apply(self, resps: list[list[str]], docs: list[dict]) -> list[list[str]]: # here, we assume we have a list, in which each element is # a list of model responses for some particular input/target pair. # so we process each of these (same input/target response sets) @@ -164,7 +166,7 @@ class MultiChoiceRegexFilter(RegexFilter): fallback_regex = re.compile("|".join(fallback_regexes)) without_paren_fallback_regex = "|".join(without_paren_fallback_regexes) without_paren_fallback_regex = re.compile( - f":[\s]*({without_paren_fallback_regex})" + rf":[\s]*({without_paren_fallback_regex})" ) filtered = [] diff --git a/lm_eval/filters/selection.py b/lm_eval/filters/selection.py index 6e368b5980626c8008ed48c45a360046660db13e..8c670ed74d00655441cc45181fba1265f0db5290 100644 --- a/lm_eval/filters/selection.py +++ b/lm_eval/filters/selection.py @@ -34,9 +34,9 @@ class TakeKFilter(Filter): # need resp to be subscriptable to check below resps = list(resps) # check we have at least k responses per doc, else we can't take the first k - assert ( - len(resps[0]) >= self.k - ), f"Need at least {self.k} responses per doc to take first {self.k}, but got {len(resps[0])} only! Please increase TaskConfig.repeats ." + assert len(resps[0]) >= self.k, ( + f"Need at least {self.k} responses per doc to take first {self.k}, but got {len(resps[0])} only! Please increase TaskConfig.repeats ." + ) return map(lambda r: r[: self.k], resps) diff --git a/lm_eval/filters/transformation.py b/lm_eval/filters/transformation.py index cac1c5921dafe74be0b8416bd3a0678dc1fa1570..1a3592b6dd4811dcef39ff090dfa42e926613b5c 100644 --- a/lm_eval/filters/transformation.py +++ b/lm_eval/filters/transformation.py @@ -43,9 +43,9 @@ class MapFilter(Filter): """ if mapping_dict is None: mapping_dict = {} - assert isinstance( - mapping_dict, dict - ), "Provided mapping_dict is not a dictionary" + assert isinstance(mapping_dict, dict), ( + "Provided mapping_dict is not a dictionary" + ) self.mapping_dict = mapping_dict self.default_value = default_value diff --git a/lm_eval/loggers/evaluation_tracker.py b/lm_eval/loggers/evaluation_tracker.py index 067b047b599fac2a0045f3a32e42b6ecec0afcaf..4067c50e8e2ad4a4c991e1b8acdcdba4fe78f338 100644 --- a/lm_eval/loggers/evaluation_tracker.py +++ b/lm_eval/loggers/evaluation_tracker.py @@ -488,7 +488,7 @@ class EvaluationTracker: else: dataset_summary += f"{self.general_config_tracker.model_name}\n" dataset_summary += ( - f"The dataset is composed of {len(card_metadata)-1} configuration(s), each one corresponding to one of the evaluated task.\n\n" + f"The dataset is composed of {len(card_metadata) - 1} configuration(s), each one corresponding to one of the evaluated task.\n\n" f"The dataset has been created from {len(results_files)} run(s). Each run can be found as a specific split in each " 'configuration, the split being named using the timestamp of the run.The "train" split is always pointing to the latest results.\n\n' 'An additional configuration "results" store all the aggregated results of the run.\n\n' @@ -501,7 +501,7 @@ class EvaluationTracker: ) dataset_summary += ( "## Latest results\n\n" - f'These are the [latest results from run {latest_datetime}]({last_results_file_path.replace("/resolve/", "/blob/")}) ' + f"These are the [latest results from run {latest_datetime}]({last_results_file_path.replace('/resolve/', '/blob/')}) " "(note that there might be results for other tasks in the repos if successive evals didn't cover the same tasks. " 'You find each in the results and the "latest" split for each eval):\n\n' f"```python\n{results_string}\n```" diff --git a/lm_eval/loggers/wandb_logger.py b/lm_eval/loggers/wandb_logger.py index 4bcc439ed84749e7dc165acceee5060ed0f4844a..53a886fcfcdc65604cd99ccaa15a24579a78bb8f 100644 --- a/lm_eval/loggers/wandb_logger.py +++ b/lm_eval/loggers/wandb_logger.py @@ -48,6 +48,9 @@ class WandbLogger: self.wandb_args: Dict[str, Any] = kwargs + # pop the step key from the args to save for all logging calls + self.step = self.wandb_args.pop("step", None) + # initialize a W&B run if wandb.run is None: self.run = wandb.init(**self.wandb_args) @@ -152,11 +155,11 @@ class WandbLogger: # log the complete eval result to W&B Table table = make_table(["Tasks"] + columns, "results") - self.run.log({"evaluation/eval_results": table}) + self.run.log({"evaluation/eval_results": table}, step=self.step) if "groups" in self.results.keys(): table = make_table(["Groups"] + columns, "groups") - self.run.log({"evaluation/group_eval_results": table}) + self.run.log({"evaluation/group_eval_results": table}, step=self.step) def _log_results_as_artifact(self) -> None: """Log results as JSON artifact to W&B.""" @@ -174,13 +177,13 @@ class WandbLogger: """Log evaluation results to W&B.""" # Log configs to wandb configs = self._get_config() - self.run.config.update(configs) + self.run.config.update(configs, allow_val_change=self.step is not None) wandb_summary, self.wandb_results = self._sanitize_results_dict() # update wandb.run.summary with items that were removed self.run.summary.update(wandb_summary) # Log the evaluation metrics to wandb - self.run.log(self.wandb_results) + self.run.log(self.wandb_results, step=self.step) # Log the evaluation metrics as W&B Table self._log_results_as_table() # Log the results dict as json to W&B Artifacts @@ -222,7 +225,7 @@ class WandbLogger: instance = [x["arguments"][0][0] for x in data] labels = [x["arguments"][0][1] for x in data] resps = [ - f'log probability of continuation is {x["resps"][0][0][0]} ' + f"log probability of continuation is {x['resps'][0][0][0]} " + "\n\n" + "continuation will {} generated with greedy sampling".format( "not be" if not x["resps"][0][0][1] else "be" @@ -230,7 +233,7 @@ class WandbLogger: for x in data ] filtered_resps = [ - f'log probability of continuation is {x["filtered_resps"][0][0]} ' + f"log probability of continuation is {x['filtered_resps'][0][0]} " + "\n\n" + "continuation will {} generated with greedy sampling".format( "not be" if not x["filtered_resps"][0][1] else "be" @@ -329,7 +332,7 @@ class WandbLogger: # log the samples as a W&B Table df = self._generate_dataset(eval_preds, self.task_configs.get(task_name)) - self.run.log({f"{task_name}_eval_results": df}) + self.run.log({f"{task_name}_eval_results": df}, step=self.step) # log the samples as a json file as W&B Artifact self._log_samples_as_artifact(eval_preds, task_name) @@ -348,4 +351,4 @@ class WandbLogger: # log the samples as a json file as W&B Artifact self._log_samples_as_artifact(eval_preds, task_name) - self.run.log({f"{group}_eval_results": grouped_df}) + self.run.log({f"{group}_eval_results": grouped_df}, step=self.step) diff --git a/lm_eval/models/__init__.py b/lm_eval/models/__init__.py index cde586ec9fbaaf37826a1925e1e105549fb554ff..39412bb1bbf2492bb318d3b8e38a6b12ca17b7b5 100644 --- a/lm_eval/models/__init__.py +++ b/lm_eval/models/__init__.py @@ -11,6 +11,7 @@ from . import ( neuralmagic, neuron_optimum, openai_completions, + optimum_ipex, optimum_lm, textsynth, vllm_causallms, diff --git a/lm_eval/models/api_models.py b/lm_eval/models/api_models.py index 24bf1a8aeb7180c1b483408bf75be2f59c53fbda..c24cea95bba66fa9338bb90395cd65c645337b59 100644 --- a/lm_eval/models/api_models.py +++ b/lm_eval/models/api_models.py @@ -195,9 +195,9 @@ class TemplateAPI(TemplateLM): """Helper method to transform the prompt into the expected API input format. messages consist of batched requests""" if isinstance(messages[0], JsonChatStr): # for chat completions we need to decode the json string to list[dict,...] - assert ( - self._batch_size == 1 - ), "non-tokenized chat requests are only supported with batch_size=1" + assert self._batch_size == 1, ( + "non-tokenized chat requests are only supported with batch_size=1" + ) # list[dict["role":..., "content":...],...] return json.loads(messages[0].prompt) @@ -253,12 +253,15 @@ class TemplateAPI(TemplateLM): return "" def apply_chat_template( - self, chat_history: List[Dict[str, str]] + self, chat_history: List[Dict[str, str]], add_generation_prompt: bool = True ) -> Union[str, JsonChatStr]: """Applies a chat template to a list of chat history between user and model.""" if self.tokenizer_backend == "huggingface" and self.tokenized_requests: return self.tokenizer.apply_chat_template( - chat_history, tokenize=False, add_generation_prompt=True + chat_history, + tokenize=False, + add_generation_prompt=add_generation_prompt, + continue_final_message=not add_generation_prompt, ) else: # bit of a hack. We'll load back before sending to the API @@ -503,9 +506,9 @@ class TemplateAPI(TemplateLM): return await tqdm_asyncio.gather(*tasks, desc="Requesting API") def _loglikelihood_tokens(self, requests, **kwargs) -> List[Tuple[float, bool]]: - assert ( - self.tokenizer is not None - ), "Tokenizer is required for loglikelihood tasks to compute context lengths." + assert self.tokenizer is not None, ( + "Tokenizer is required for loglikelihood tasks to compute context lengths." + ) res = [] def _collate(req: LogLikelihoodInputs): diff --git a/lm_eval/models/hf_vlms.py b/lm_eval/models/hf_vlms.py index a4fad6321f491b080e50cd7e371366ba44ca3593..4e67debe17b227889225138d4667f85bda4e0eea 100644 --- a/lm_eval/models/hf_vlms.py +++ b/lm_eval/models/hf_vlms.py @@ -51,9 +51,9 @@ class HFMultimodalLM(HFLM): # modify init behavior. super().__init__(pretrained, **kwargs) - assert ( - self.batch_size != "auto" - ), "Batch size 'auto' is not yet supported for hf-multimodal models." + assert self.batch_size != "auto", ( + "Batch size 'auto' is not yet supported for hf-multimodal models." + ) self.chat_applied: bool = False # TODO: phi-3.5 "image placeholders" are , , ... in order. how to handle this case @@ -73,9 +73,9 @@ class HFMultimodalLM(HFLM): or getattr(self.config, "image_token_index", None) ) ) - assert ( - self.image_token_id is not None - ), "Must have a non-None image_token_id to evaluate a Hugging Face AutoModelForVision2Seq model. Please pass `image_token_id` in `--model_args` if model's config does not already specify one." + assert self.image_token_id is not None, ( + "Must have a non-None image_token_id to evaluate a Hugging Face AutoModelForVision2Seq model. Please pass `image_token_id` in `--model_args` if model's config does not already specify one." + ) # get the string this token ID corresponds to self.image_token = self.tok_decode( [self.image_token_id], skip_special_tokens=False @@ -200,7 +200,9 @@ class HFMultimodalLM(HFLM): return context_enc, continuation_enc, image_enc - def apply_chat_template(self, chat_history: List[Dict[str, str]]) -> str: + def apply_chat_template( + self, chat_history: List[Dict[str, str]], add_generation_prompt: bool = True + ) -> str: self.chat_applied = True if not self.interleave: for content in chat_history: @@ -250,7 +252,9 @@ class HFMultimodalLM(HFLM): ) return self.processor.apply_chat_template( - chat_history, add_generation_prompt=True + chat_history, + add_generation_prompt=add_generation_prompt, + continue_final_message=not add_generation_prompt, ) def chat_template(self, chat_template: Union[bool, str] = False) -> Optional[str]: diff --git a/lm_eval/models/huggingface.py b/lm_eval/models/huggingface.py index 0a5fa2ed8548b54f9a215bd5cd935e350360f1a0..919d505aa566a9cda55468f2e27b2265487b4c37 100644 --- a/lm_eval/models/huggingface.py +++ b/lm_eval/models/huggingface.py @@ -90,6 +90,7 @@ class HFLM(TemplateLM): delta: Optional[str] = None, autogptq: Optional[Union[bool, str]] = False, gptqmodel: Optional[bool] = False, + gguf_file: Optional[str] = None, **kwargs, ) -> None: super().__init__() @@ -98,7 +99,9 @@ class HFLM(TemplateLM): eval_logger.warning( "`pretrained` model kwarg is not of type `str`. Many other model arguments may be ignored. Please do not launch via accelerate or use `parallelize=True` if passing an existing model this way." ) - assert not parallelize, "`parallelize=True` is not compatible with passing pre-initialized model to `pretrained`" + assert not parallelize, ( + "`parallelize=True` is not compatible with passing pre-initialized model to `pretrained`" + ) self._model = pretrained self._device = self._model.device self._config = self._model.config @@ -164,6 +167,7 @@ class HFLM(TemplateLM): pretrained, revision=revision, trust_remote_code=trust_remote_code, + gguf_file=gguf_file, ) # determine which of 'causal' and 'seq2seq' backends to use for HF models @@ -178,6 +182,7 @@ class HFLM(TemplateLM): revision=revision, trust_remote_code=trust_remote_code, use_fast_tokenizer=use_fast_tokenizer, + gguf_file=gguf_file, ) # if we passed `pretrained` as a string, initialize our model now @@ -196,6 +201,7 @@ class HFLM(TemplateLM): delta=delta, autogptq=autogptq, gptqmodel=gptqmodel, + gguf_file=gguf_file, **kwargs, ) @@ -508,12 +514,14 @@ class HFLM(TemplateLM): pretrained: str, revision: str = "main", trust_remote_code: bool = False, + gguf_file: Optional[str] = None, ) -> None: """Return the model config for HuggingFace models""" self._config = transformers.AutoConfig.from_pretrained( pretrained, revision=revision, trust_remote_code=trust_remote_code, + gguf_file=gguf_file, ) def _create_model( @@ -535,6 +543,7 @@ class HFLM(TemplateLM): delta: Optional[str] = None, autogptq: Optional[Union[bool, str]] = False, gptqmodel: Optional[bool] = False, + gguf_file: Optional[str] = None, **kwargs, ) -> None: """ @@ -564,9 +573,9 @@ class HFLM(TemplateLM): if not autogptq and not gptqmodel: if model_kwargs.get("load_in_4bit", None): - assert ( - transformers.__version__ >= "4.30.0" - ), "load_in_4bit requires transformers >= 4.30.0" + assert transformers.__version__ >= "4.30.0", ( + "load_in_4bit requires transformers >= 4.30.0" + ) if transformers.__version__ >= "4.30.0": if model_kwargs.get("load_in_4bit", None): if model_kwargs.get("bnb_4bit_compute_dtype", None): @@ -579,6 +588,7 @@ class HFLM(TemplateLM): revision=revision, torch_dtype=get_dtype(dtype), trust_remote_code=trust_remote_code, + gguf_file=gguf_file, **model_kwargs, ) else: @@ -676,6 +686,7 @@ class HFLM(TemplateLM): revision: Optional[str] = "main", trust_remote_code: Optional[bool] = False, use_fast_tokenizer: Optional[bool] = True, + gguf_file: Optional[str] = None, ) -> None: """ Helper method during initialization. @@ -683,14 +694,21 @@ class HFLM(TemplateLM): Create a tokenizer object corresponding to the correct tokenizer for value of `pretrained`, or use the pre-initialized tokenizer passed. """ + kwargs = { + "revision": revision, + "trust_remote_code": trust_remote_code, + } + + # gguf format embeds tokenizer and is not compatible with hf tokenizer `use_fast` param + if gguf_file is not None: + kwargs["gguf_file"] = gguf_file + else: + kwargs["use_fast"] = use_fast_tokenizer if tokenizer: if isinstance(tokenizer, str): self.tokenizer = transformers.AutoTokenizer.from_pretrained( - tokenizer, - revision=revision, - trust_remote_code=trust_remote_code, - use_fast=use_fast_tokenizer, + tokenizer, **kwargs ) else: assert isinstance( @@ -705,10 +723,7 @@ class HFLM(TemplateLM): # get the HF hub name via accessor on model model_name = self.model.name_or_path self.tokenizer = transformers.AutoTokenizer.from_pretrained( - model_name, - revision=revision, - trust_remote_code=trust_remote_code, - use_fast=use_fast_tokenizer, + model_name, **kwargs ) return None @@ -818,6 +833,12 @@ class HFLM(TemplateLM): **add_special_tokens, ) if left_truncate_len: + original_lengths = encoding["input_ids"].size(1) + if original_lengths > left_truncate_len: + eval_logger.warn( + f"Left truncation applied. Original sequence length was {original_lengths}, " + f"truncating to last {left_truncate_len} tokens. Some content will be lost.", + ) encoding["input_ids"] = encoding["input_ids"][:, -left_truncate_len:] encoding["attention_mask"] = encoding["attention_mask"][ :, -left_truncate_len: @@ -886,16 +907,16 @@ class HFLM(TemplateLM): self, logits: torch.Tensor, contlen: int = None, inplen: int = None ) -> torch.Tensor: if self.backend == "causal": - assert ( - contlen and inplen - ), "Must pass input len and cont. len to select scored logits for causal LM" + assert contlen and inplen, ( + "Must pass input len and cont. len to select scored logits for causal LM" + ) # discard right-padding. # also discard the input/context tokens. we'll only score continuations. logits = logits[inplen - contlen : inplen] elif self.backend == "seq2seq": - assert ( - contlen and not inplen - ), "Selecting scored logits for Seq2SeqLM requires only cont. len" + assert contlen and not inplen, ( + "Selecting scored logits for Seq2SeqLM requires only cont. len" + ) # only discard right-padding. # the logits input to this fn only contain decoder-side tokens. logits = logits[:contlen] @@ -905,8 +926,6 @@ class HFLM(TemplateLM): def loglikelihood_rolling( self, requests: List[Instance], disable_tqdm: bool = False ) -> List[float]: - loglikelihoods = [] - adaptive_batch_size = None if self.batch_size == "auto": # using rolling window with maximum context @@ -915,10 +934,17 @@ class HFLM(TemplateLM): print(f"Determined Largest batch size: {batch_size}") adaptive_batch_size = batch_size - for (string,) in tqdm( - [req.args for req in requests], disable=(disable_tqdm or (self.rank != 0)) + # First, collect all windows from all requests + all_windows = [] # List of (request_idx, window) tuples + request_window_counts = [] # Track number of windows per request + + for req_idx, (string,) in enumerate( + tqdm( + [req.args for req in requests], + disable=(disable_tqdm or (self.rank != 0)), + ) ): - rolling_token_windows = list( + rolling_token_windows: List[Tuple[List[int], List[int]]] = list( map( utils.make_disjoint_window, utils.get_rolling_token_windows( @@ -931,37 +957,55 @@ class HFLM(TemplateLM): ) # TODO: Right now, we pass single EOT token to the Encoder and the full context to the decoder, in seq2seq case - rolling_token_windows = [(None,) + x for x in rolling_token_windows] - - pad_amnt = 0 - if self.world_size > 1: - # We pad out the external document-level iterator so the inner iterator doesn't hang - mytensor = torch.tensor(len(rolling_token_windows), device=self.device) - gathered = ( - self.accelerator.gather(mytensor).cpu().detach().numpy().tolist() - ) + windows = [(None,) + x for x in rolling_token_windows] - pad_amnt = max(gathered) - gathered[self.rank] - if pad_amnt > 0: - rolling_token_windows += pad_amnt * [rolling_token_windows[0]] + # Store windows with their request index + all_windows.extend((req_idx, window) for window in windows) + request_window_counts.append(len(windows)) - string_nll = self._loglikelihood_tokens( - requests=rolling_token_windows, - disable_tqdm=True, - override_bs=adaptive_batch_size, + # Handle distributed case padding + pad_amnt = 0 + if self.world_size > 1: + mytensor = torch.tensor(len(all_windows), device=self.device) + gathered = self.accelerator.gather(mytensor).cpu().detach().numpy().tolist() + pad_amnt = max(gathered) - gathered[self.rank] + if pad_amnt > 0: + all_windows += pad_amnt * [all_windows[0]] + + all_nlls = [] + batch_size = adaptive_batch_size or self.batch_size + for i in range(0, len(all_windows), batch_size): + batch = all_windows[i : i + batch_size] + # Extract just the windows for processing, keeping track of request indices + batch_indices, batch_windows = zip(*batch) + + batch_nlls = self._loglikelihood_tokens( + requests=batch_windows, + disable_tqdm=False, + override_bs=len(batch_windows), ) + # Store results with their request indices + all_nlls.extend(zip(batch_indices, batch_nlls)) - if (self.world_size > 1) and (pad_amnt > 0): - string_nll = [x[0] for x in string_nll[:-pad_amnt]] - else: - # discard is_greedy - string_nll = [x[0] for x in string_nll] + # Remove padding if necessary + if (self.world_size > 1) and (pad_amnt > 0): + all_nlls = all_nlls[:-pad_amnt] - string_nll = sum(string_nll) - loglikelihoods.append(string_nll) - - # cache this loglikelihood_rolling request - self.cache_hook.add_partial("loglikelihood_rolling", (string,), string_nll) + # Reconstruct per-request loglikelihoods + loglikelihoods = [] + current_idx = 0 + for window_count in request_window_counts: + # Get all nlls for this request + request_nlls = all_nlls[current_idx : current_idx + window_count] + # Sum up the nlls for this request (discarding is_greedy) + request_total = sum(nll[0] for _, nll in request_nlls) + loglikelihoods.append(request_total) + current_idx += window_count + + string = requests[len(loglikelihoods) - 1].args[0] + self.cache_hook.add_partial( + "loglikelihood_rolling", (string,), request_total + ) return loglikelihoods @@ -1073,6 +1117,13 @@ class HFLM(TemplateLM): # when too long to fit in context, truncate from the left if self.backend == "causal": + total_length = len(context_enc) + len(continuation_enc) + if total_length > self.max_length + 1: + eval_logger.warn( + f"Combined length of context ({len(context_enc)}) and continuation ({len(continuation_enc)}) " + f"exceeds model's maximum length ({self.max_length}). " + f"Truncating {total_length - self.max_length + 1} tokens from the left." + ) inp = torch.tensor( (context_enc + continuation_enc)[-(self.max_length + 1) :][:-1], dtype=torch.long, @@ -1280,6 +1331,9 @@ class HFLM(TemplateLM): if self.backend == "causal": # max len for inputs = max length, minus room to generate the max new tokens max_ctx_len = self.max_length - max_gen_toks + assert max_ctx_len > 0, ( + f"Invalid configuration: requested max tokens to generate ({max_gen_toks}) must be less than model's maximum sequence length ({self.max_length})." + ) elif self.backend == "seq2seq": # max len for inputs = encoder's whole max_length max_ctx_len = self.max_length @@ -1330,13 +1384,18 @@ class HFLM(TemplateLM): return res - def apply_chat_template(self, chat_history: List[Dict[str, str]]) -> str: + def apply_chat_template( + self, chat_history: List[Dict[str, str]], add_generation_prompt: bool = True + ) -> str: """ Method to apply a chat template to a list of chat history between user and model. """ try: chat_templated = self.tokenizer.apply_chat_template( - chat_history, tokenize=False, add_generation_prompt=True + chat_history, + tokenize=False, + add_generation_prompt=add_generation_prompt, + continue_final_message=not add_generation_prompt, ) except jinja2.exceptions.TemplateError: eval_logger.warning( @@ -1344,7 +1403,10 @@ class HFLM(TemplateLM): ) chat_history = [msg for msg in chat_history if msg["role"] != "system"] chat_templated = self.tokenizer.apply_chat_template( - chat_history, tokenize=False, add_generation_prompt=True + chat_history, + tokenize=False, + add_generation_prompt=add_generation_prompt, + continue_final_message=not add_generation_prompt, ) return chat_templated diff --git a/lm_eval/models/neuron_optimum.py b/lm_eval/models/neuron_optimum.py index ca2aaf657eeba309e116ef4b99db98686e7c1376..2f3aa929938176e43fe4801ca600a367e7634f7a 100644 --- a/lm_eval/models/neuron_optimum.py +++ b/lm_eval/models/neuron_optimum.py @@ -206,7 +206,7 @@ class NEURON_HF(TemplateLM): "Only float16/bfloat16/float32 are supported." ) - print(f"{'='*20} \n exporting model to neuron") + print(f"{'=' * 20} \n exporting model to neuron") self.model = CustomNeuronModelForCausalLM.from_pretrained( pretrained, revision=revision, @@ -220,19 +220,17 @@ class NEURON_HF(TemplateLM): ) neuron_config = self.model.config.neuron print( - f"SUCCESS: neuron model exported with config {neuron_config}. \n {'='*20}" + f"SUCCESS: neuron model exported with config {neuron_config}. \n {'=' * 20}" ) else: - print( - f"{'='*20} \n loading neuron model with config" f" {neuron_config}..." - ) + print(f"{'=' * 20} \n loading neuron model with config {neuron_config}...") self.model = CustomNeuronModelForCausalLM.from_pretrained( pretrained, revision=revision, trust_remote_code=trust_remote_code, low_cpu_mem_usage=low_cpu_mem_usage, ) - print(f"SUCCESS: neuron model loaded. \n {'='*20}") + print(f"SUCCESS: neuron model loaded. \n {'=' * 20}") self.truncation = truncation @@ -353,9 +351,9 @@ class NEURON_HF(TemplateLM): ) def _select_cont_toks(self, logits, contlen=None, inplen=None): - assert ( - contlen and inplen - ), "Must pass input len and cont. len to select scored logits for causal LM" + assert contlen and inplen, ( + "Must pass input len and cont. len to select scored logits for causal LM" + ) # discard right-padding. # also discard the input/context tokens. we'll only score continuations. logits = logits[inplen - contlen : inplen] diff --git a/lm_eval/models/openai_completions.py b/lm_eval/models/openai_completions.py index 46d6373284a5b9e91df082280ca9409654e2107f..1afc0f6ad16bdcbbd13b2a2d4475a52c4d53b156 100644 --- a/lm_eval/models/openai_completions.py +++ b/lm_eval/models/openai_completions.py @@ -1,5 +1,6 @@ import os from functools import cached_property +from operator import itemgetter from typing import Any, Dict, List, Optional, Tuple, Union from lm_eval.api.registry import register_model @@ -68,7 +69,9 @@ class LocalCompletionsAPI(TemplateAPI): if not isinstance(outputs, list): outputs = [outputs] for out in outputs: - for choice, ctxlen in zip(out["choices"], ctxlens): + for choice, ctxlen in zip( + sorted(out["choices"], key=itemgetter("index")), ctxlens + ): assert ctxlen > 0, "Context length must be greater than 0" logprobs = sum(choice["logprobs"]["token_logprobs"][ctxlen:-1]) tokens_logprobs = choice["logprobs"]["token_logprobs"][ctxlen:-1] @@ -87,8 +90,10 @@ class LocalCompletionsAPI(TemplateAPI): if not isinstance(outputs, list): outputs = [outputs] for out in outputs: + tmp = [None] * len(out["choices"]) for choices in out["choices"]: - res.append(choices["text"]) + tmp[choices["index"]] = choices["text"] + res = res + tmp return res @property @@ -129,9 +134,9 @@ class LocalChatCompletion(LocalCompletionsAPI): eos=None, **kwargs, ) -> dict: - assert ( - type(messages) is not str - ), "chat-completions require the --apply_chat_template flag." + assert type(messages) is not str, ( + "chat-completions require the --apply_chat_template flag." + ) gen_kwargs.pop("do_sample", False) if "max_tokens" in gen_kwargs: max_tokens = gen_kwargs.pop("max_tokens") @@ -157,8 +162,10 @@ class LocalChatCompletion(LocalCompletionsAPI): if not isinstance(outputs, list): outputs = [outputs] for out in outputs: + tmp = [None] * len(out["choices"]) for choices in out["choices"]: - res.append(choices["message"]["content"]) + tmp[choices["index"]] = choices["message"]["content"] + res = res + tmp return res def tok_encode( @@ -201,13 +208,12 @@ class OpenAICompletionsAPI(LocalCompletionsAPI): return key def loglikelihood(self, requests, **kwargs): - assert ( - self.model - in [ - "babbage-002", - "davinci-002", - ] - ), f"Prompt loglikelihoods are only supported by OpenAI's API for {['babbage-002', 'davinci-002']}." + assert self.model in [ + "babbage-002", + "davinci-002", + ], ( + f"Prompt loglikelihoods are only supported by OpenAI's API for {['babbage-002', 'davinci-002']}." + ) return super().loglikelihood(requests, **kwargs) def chat_template(self, chat_template: Union[bool, str] = False) -> Optional[str]: @@ -258,9 +264,9 @@ class OpenAIChatCompletion(LocalChatCompletion): eos="<|endoftext|>", **kwargs, ) -> dict: - assert ( - type(messages) is not str - ), "chat-completions require the --apply_chat_template flag." + assert type(messages) is not str, ( + "chat-completions require the --apply_chat_template flag." + ) gen_kwargs.pop("do_sample", False) if "max_tokens" in gen_kwargs: max_tokens = gen_kwargs.pop("max_tokens") diff --git a/lm_eval/models/optimum_ipex.py b/lm_eval/models/optimum_ipex.py new file mode 100644 index 0000000000000000000000000000000000000000..68d38528330ef0b5667a92d383e5bf8fba3a0dab --- /dev/null +++ b/lm_eval/models/optimum_ipex.py @@ -0,0 +1,79 @@ +from importlib.util import find_spec + +from lm_eval import utils +from lm_eval.api.registry import register_model +from lm_eval.models.huggingface import HFLM +from lm_eval.models.utils import get_dtype + + +eval_logger = utils.eval_logger + + +@register_model("ipex") +class IPEXLM(HFLM): + """ + using the HuggingFace transformers + optimum-intel ipex backend, can run on intel cpu and intel gpu + """ + + def __init__( + self, + **kwargs, + ) -> None: + if "backend" in kwargs: + # currently only supports causal models + assert kwargs["backend"] == "causal", ( + "Currently, only IPEXModelForCausalLM is supported." + ) + + super().__init__( + backend=kwargs.pop("backend", "causal"), + **kwargs, + ) + + def _create_model( + self, + pretrained: str, + revision="main", + dtype="auto", + trust_remote_code=False, + # arguments used for splitting a model across GPUs naively. + # only used if `parallelize=True`. + # (accelerate naive PP (device_map) options) + parallelize=False, + gpus=None, + max_memory_per_gpu=None, + max_cpu_memory=None, + offload_folder="./offload", + # PEFT, delta weights and quantization options + peft=None, + delta=None, + autogptq=False, + gptqmodel=False, + **kwargs, + ) -> None: + if not find_spec("optimum"): + raise ModuleNotFoundError( + "package `optimum` is not installed. Please install it via `pip install optimum[ipex]`" + ) + else: + from optimum.intel import IPEXModelForCausalLM + + model_kwargs = kwargs if kwargs else {} + model_kwargs.update( + self._get_accelerate_args( + parallelize=parallelize, + device_map=kwargs.get("device_map", None), + max_memory_per_gpu=max_memory_per_gpu, + max_cpu_memory=max_cpu_memory, + offload_folder=offload_folder, + gpus=gpus, + ) + ) + + self._model = IPEXModelForCausalLM.from_pretrained( + pretrained, + revision=revision, + torch_dtype=get_dtype(dtype), + trust_remote_code=trust_remote_code, + **model_kwargs, + ) diff --git a/lm_eval/models/optimum_lm.py b/lm_eval/models/optimum_lm.py index b13b321f25e1d643ad8692a0a5ed33c39d652b8c..de5e2460b7fe146d019a5bef787a50ba4e9c6be6 100644 --- a/lm_eval/models/optimum_lm.py +++ b/lm_eval/models/optimum_lm.py @@ -29,9 +29,9 @@ class OptimumLM(HFLM): ) -> None: if "backend" in kwargs: # optimum currently only supports causal models - assert ( - kwargs["backend"] == "causal" - ), "Currently, only OVModelForCausalLM is supported." + assert kwargs["backend"] == "causal", ( + "Currently, only OVModelForCausalLM is supported." + ) self.openvino_device = device diff --git a/lm_eval/models/utils.py b/lm_eval/models/utils.py index e7c28c3e6ac5ae5691b1450aaa7820d55aadc394..8d672c129612205e38e4428ab861a80a98a49ac7 100644 --- a/lm_eval/models/utils.py +++ b/lm_eval/models/utils.py @@ -155,9 +155,9 @@ def pad_and_concat( length in the batch. Used for batching inputs and continuations in seq2seq models. """ - assert ( - padding_side == "left" or padding_side == "right" - ), f"Unrecognized padding type: '{padding_side}' not 'left' or 'right'" + assert padding_side == "left" or padding_side == "right", ( + f"Unrecognized padding type: '{padding_side}' not 'left' or 'right'" + ) for i, tensor in enumerate(tensors): if len(tensor.shape) == 2: diff --git a/lm_eval/models/vllm_causallms.py b/lm_eval/models/vllm_causallms.py index 7afc2cb83181e8ac6255c6587fcafb154d635abd..5718cb5da0cd44b7b58de19c73a0d670d88d06ae 100644 --- a/lm_eval/models/vllm_causallms.py +++ b/lm_eval/models/vllm_causallms.py @@ -76,9 +76,9 @@ class VLLM(TemplateLM): ) assert "cuda" in device or device is None, "vLLM only supports CUDA" - assert ( - max_length is None or max_model_len is None - ), "Either max_length or max_model_len may be provided, but not both" + assert max_length is None or max_model_len is None, ( + "Either max_length or max_model_len may be provided, but not both" + ) self._max_length = max_model_len if max_model_len is not None else max_length self.tensor_parallel_size = int(tensor_parallel_size) @@ -102,7 +102,7 @@ class VLLM(TemplateLM): self.batch_size = ( "auto" if isinstance(batch_size, str) and "auto" in batch_size - else batch_size + else int(batch_size) ) if self.data_parallel_size <= 1: self.model = LLM(**self.model_args) @@ -142,9 +142,9 @@ class VLLM(TemplateLM): self._max_gen_toks = max_gen_toks if lora_local_path is not None: - assert parse_version(version("vllm")) > parse_version( - "0.3.0" - ), "lora adapters only compatible with vllm > v0.3.0." + assert parse_version(version("vllm")) > parse_version("0.3.0"), ( + "lora adapters only compatible with vllm > v0.3.0." + ) self.lora_request = LoRARequest("finetuned", 1, lora_local_path) else: self.lora_request = None @@ -184,14 +184,21 @@ class VLLM(TemplateLM): def max_gen_toks(self): return self._max_gen_toks - def apply_chat_template(self, chat_history: List[Dict[str, str]]) -> str: + def apply_chat_template( + self, chat_history: List[Dict[str, str]], add_generation_prompt: bool = True + ) -> str: """ Method to apply a chat template to a list of chat history between user and model. """ - return self.tokenizer.apply_chat_template( - chat_history, tokenize=False, add_generation_prompt=True + chat_templated = self.tokenizer.apply_chat_template( + chat_history, + tokenize=False, + add_generation_prompt=add_generation_prompt, + continue_final_message=not add_generation_prompt, ) + return chat_templated + @property def tokenizer_name(self) -> str: return self.tokenizer.name_or_path.replace("/", "__") @@ -281,10 +288,21 @@ class VLLM(TemplateLM): def loglikelihood_rolling( self, requests: List[Instance], disable_tqdm: bool = False ) -> List[float]: - loglikelihoods = [] - - for (string,) in tqdm([req.args for req in requests], disable=disable_tqdm): - rolling_token_windows = list( + adaptive_batch_size = None + if self.batch_size == "auto": + adaptive_batch_size = len(requests) + + # First, collect all windows from all requests + all_windows = [] # List of (request_idx, window) tuples + request_window_counts = [] # Track number of windows per request + + for req_idx, (string,) in enumerate( + tqdm( + [req.args for req in requests], + disable=(disable_tqdm or (self.rank != 0)), + ) + ): + rolling_token_windows: List[Tuple[List[int], List[int]]] = list( map( make_disjoint_window, get_rolling_token_windows( @@ -297,20 +315,42 @@ class VLLM(TemplateLM): ) ) - rolling_token_windows = [(None,) + x for x in rolling_token_windows] + # TODO: Right now, we pass single EOT token to the Encoder and the full context to the decoder, in seq2seq case + windows = [(None,) + x for x in rolling_token_windows] - string_nll = self._loglikelihood_tokens( - rolling_token_windows, - ) + # Store windows with their request index + all_windows.extend((req_idx, window) for window in windows) + request_window_counts.append(len(windows)) - # discard is_greedy - string_nll = [x[0] for x in string_nll] + all_nlls = [] + batch_size = adaptive_batch_size or int(self.batch_size) + for i in range(0, len(all_windows), batch_size): + batch = all_windows[i : i + batch_size] + # Extract just the windows for processing, keeping track of request indices + batch_indices, batch_windows = zip(*batch) - string_nll = sum(string_nll) - loglikelihoods.append(string_nll) + batch_nlls = self._loglikelihood_tokens( + requests=batch_windows, + disable_tqdm=False, + ) + # Store results with their request indices + all_nlls.extend(zip(batch_indices, batch_nlls)) - # cache this loglikelihood_rolling request - self.cache_hook.add_partial("loglikelihood_rolling", (string,), string_nll) + # Reconstruct per-request loglikelihoods + loglikelihoods = [] + current_idx = 0 + for window_count in request_window_counts: + # Get all nlls for this request + request_nlls = all_nlls[current_idx : current_idx + window_count] + # Sum up the nlls for this request (discarding is_greedy) + request_total = sum(nll[0] for _, nll in request_nlls) + loglikelihoods.append(request_total) + current_idx += window_count + + string = requests[len(loglikelihoods) - 1].args[0] + self.cache_hook.add_partial( + "loglikelihood_rolling", (string,), request_total + ) return loglikelihoods diff --git a/lm_eval/models/vllm_vlms.py b/lm_eval/models/vllm_vlms.py index b434ba059fa090506adbd11082af6608316648f2..ab216ab59d6bb7a2f4745c365e312bb059712094 100644 --- a/lm_eval/models/vllm_vlms.py +++ b/lm_eval/models/vllm_vlms.py @@ -144,7 +144,9 @@ class VLLM_VLM(VLLM): ) return outputs - def apply_chat_template(self, chat_history: List[Dict[str, str]]) -> str: + def apply_chat_template( + self, chat_history: List[Dict[str, str]], add_generation_prompt=True + ) -> str: self.chat_applied = True if not self.interleave: for content in chat_history: @@ -194,7 +196,9 @@ class VLLM_VLM(VLLM): ) return self.processor.apply_chat_template( - chat_history, add_generation_prompt=True + chat_history, + add_generation_prompt=add_generation_prompt, + continue_final_message=not add_generation_prompt, ) def generate_until( diff --git a/lm_eval/tasks/README.md b/lm_eval/tasks/README.md index 62e65a1eb8fe0959908692431f314e345d2c55f8..c92043bcb32ff7a595547d39693fdebe5cddbb37 100644 --- a/lm_eval/tasks/README.md +++ b/lm_eval/tasks/README.md @@ -5,130 +5,136 @@ For more information, including a full list of task names and their precise meanings or sources, follow the links provided to the individual README.md files for each subfolder. -| Task Family | Description | Language(s) | -|-------------|-------------|-------------| -| [aclue](aclue/README.md) | Tasks focusing on ancient Chinese language understanding and cultural aspects. | Ancient Chinese | -| [aexams](aexams/README.md) | Tasks in Arabic related to various academic exams covering a range of subjects. | Arabic | -| [agieval](agieval/README.md) | Tasks involving historical data or questions related to history and historical texts. | English, Chinese | -| [anli](anli/README.md) | Adversarial natural language inference tasks designed to test model robustness. | English | -| [arabic_leaderboard_complete](arabic_leaderboard_complete/README.md) | A full version of the tasks in the Open Arabic LLM Leaderboard, focusing on the evaluation of models that reflect the characteristics of Arabic language understanding and comprehension, culture, and heritage. Note that some of these tasks are machine-translated. | Arabic (Some MT) | -| [arabic_leaderboard_light](arabic_leaderboard_light/README.md) | A light version of the tasks in the Open Arabic LLM Leaderboard (i.e., 10% samples of the test set in the original benchmarks), focusing on the evaluation of models that reflect the characteristics of Arabic language understanding and comprehension, culture, and heritage. Note that some of these tasks are machine-translated. | Arabic (Some MT) | -| [arabicmmlu](arabicmmlu/README.md) | Localized Arabic version of MMLU with multiple-choice questions from 40 subjects. | Arabic | -| [arc](arc/README.md) | Tasks involving complex reasoning over a diverse set of questions. | English | -| [arithmetic](arithmetic/README.md) | Tasks involving numerical computations and arithmetic reasoning. | English | -| [asdiv](asdiv/README.md) | Tasks involving arithmetic and mathematical reasoning challenges. | English | -| [babi](babi/README.md) | Tasks designed as question and answering challenges based on simulated stories. | English | -| [basque_bench](basque_bench/README.md) | Collection of tasks in Basque encompassing various evaluation areas. | Basque | -| [basqueglue](basqueglue/README.md) | Tasks designed to evaluate language understanding in Basque language. | Basque | -| [bbh](bbh/README.md) | Tasks focused on deep semantic understanding through hypothesization and reasoning. | English, German | -| [belebele](belebele/README.md) | Language understanding tasks in a variety of languages and scripts. | Multiple (122 languages) | -| benchmarks | General benchmarking tasks that test a wide range of language understanding capabilities. | | -| [bertaqa](bertaqa/README.md) | Local Basque cultural trivia QA tests in English and Basque languages. | English, Basque, Basque (MT) | -| [bigbench](bigbench/README.md) | Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models. | Multiple | -| [blimp](blimp/README.md) | Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities. | English | -| [catalan_bench](catalan_bench/README.md) | Collection of tasks in Catalan encompassing various evaluation areas. | Catalan | -| [ceval](ceval/README.md) | Tasks that evaluate language understanding and reasoning in an educational context. | Chinese | -| [cmmlu](cmmlu/README.md) | Multi-subject multiple choice question tasks for comprehensive academic assessment. | Chinese | -| code_x_glue | Tasks that involve understanding and generating code across multiple programming languages. | Go, Java, JS, PHP, Python, Ruby | -| [commonsense_qa](commonsense_qa/README.md) | CommonsenseQA, a multiple-choice QA dataset for measuring commonsense knowledge. | English | -| [copal_id](copal_id/README.md) | Indonesian causal commonsense reasoning dataset that captures local nuances. | Indonesian | -| [coqa](coqa/README.md) | Conversational question answering tasks to test dialog understanding. | English | -| [crows_pairs](crows_pairs/README.md) | Tasks designed to test model biases in various sociodemographic groups. | English, French | -| csatqa | Tasks related to SAT and other standardized testing questions for academic assessment. | Korean | -| [drop](drop/README.md) | Tasks requiring numerical reasoning, reading comprehension, and question answering. | English | -| [eq_bench](eq_bench/README.md) | Tasks focused on equality and ethics in question answering and decision-making. | English | -| [eus_exams](eus_exams/README.md) | Tasks based on various professional and academic exams in the Basque language. | Basque | -| [eus_proficiency](eus_proficiency/README.md) | Tasks designed to test proficiency in the Basque language across various topics. | Basque | -| [eus_reading](eus_reading/README.md) | Reading comprehension tasks specifically designed for the Basque language. | Basque | -| [eus_trivia](eus_trivia/README.md) | Trivia and knowledge testing tasks in the Basque language. | Basque | -| [fda](fda/README.md) | Tasks for extracting key-value pairs from FDA documents to test information extraction. | English | -| [fld](fld/README.md) | Tasks involving free-form and directed dialogue understanding. | English | -| [french_bench](french_bench/README.md) | Set of tasks designed to assess language model performance in French. | French| -| [galician_bench](galician_bench/README.md) | Collection of tasks in Galician encompassing various evaluation areas. | Galician | -| [glue](glue/README.md) | General Language Understanding Evaluation benchmark to test broad language abilities. | English | -| [gpqa](gpqa/README.md) | Tasks designed for general public question answering and knowledge verification. | English | -| [gsm8k](gsm8k/README.md) | A benchmark of grade school math problems aimed at evaluating reasoning capabilities. | English | -| [haerae](haerae/README.md) | Tasks focused on assessing detailed factual and historical knowledge. | Korean | -| [headqa](headqa/README.md) | A high-level education-based question answering dataset to test specialized knowledge. | Spanish, English | -| [hellaswag](hellaswag/README.md) | Tasks to predict the ending of stories or scenarios, testing comprehension and creativity. | English | -| [hendrycks_ethics](hendrycks_ethics/README.md) | Tasks designed to evaluate the ethical reasoning capabilities of models. | English | -| [hendrycks_math](hendrycks_math/README.md) | Mathematical problem-solving tasks to test numerical reasoning and problem-solving. | English | -| [ifeval](ifeval/README.md) | Interactive fiction evaluation tasks for narrative understanding and reasoning. | English | -| [inverse_scaling](inverse_scaling/README.md) | Multiple-choice tasks from the Inverse Scaling Prize, designed to find settings where larger language models perform worse. | English | -| [japanese_leaderboard](japanese_leaderboard/README.md) | Japanese language understanding tasks to benchmark model performance on various linguistic aspects. | Japanese | -| [kbl](kbl/README.md) | Korean Benchmark for Legal Language Understanding. | Korean | -| [kmmlu](kmmlu/README.md) | Knowledge-based multi-subject multiple choice questions for academic evaluation. | Korean | -| [kobest](kobest/README.md) | A collection of tasks designed to evaluate understanding in Korean language. | Korean | -| [kormedmcqa](kormedmcqa/README.md) | Medical question answering tasks in Korean to test specialized domain knowledge. | Korean | -| [lambada](lambada/README.md) | Tasks designed to predict the endings of text passages, testing language prediction skills. | English | -| [lambada_cloze](lambada_cloze/README.md) | Cloze-style LAMBADA dataset. | English | -| [lambada_multilingual](lambada_multilingual/README.md) | Multilingual LAMBADA dataset. This is a legacy version of the multilingual dataset, and users should instead use `lambada_multilingual_stablelm`. | German, English, Spanish, French, Italian | -| [lambada_multilingual_stablelm](lambada_multilingual_stablelm/README.md) | Multilingual LAMBADA dataset. Users should prefer evaluating on this version of the multilingual dataset instead of on `lambada_multilingual`. | German, English, Spanish, French, Italian, Dutch, Portuguese | -| [leaderboard](leaderboard/README.md) | Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time | English | -| [lingoly](lingoly/README.md) | Challenging logical reasoning benchmark in low-resource languages with controls for memorization | English, Multilingual | -| [logiqa](logiqa/README.md) | Logical reasoning tasks requiring advanced inference and deduction. | English, Chinese | -| [logiqa2](logiqa2/README.md) | Large-scale logical reasoning dataset adapted from the Chinese Civil Service Examination. | English, Chinese | -| [mathqa](mathqa/README.md) | Question answering tasks involving mathematical reasoning and problem-solving. | English | -| [mc_taco](mc_taco/README.md) | Question-answer pairs that require temporal commonsense comprehension. | English | -| [med_concepts_qa](med_concepts_qa/README.md) | Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept. | English | -| [metabench](metabench/README.md) | Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait. | English | -| medmcqa | Medical multiple choice questions assessing detailed medical knowledge. | English | -| medqa | Multiple choice question answering based on the United States Medical License Exams. | | -| [mgsm](mgsm/README.md) | Benchmark of multilingual grade-school math problems. | Spanish, French, German, Russian, Chinese, Japanese, Thai, Swahili, Bengali, Telugu | -| [minerva_math](minerva_math/README.md) | Mathematics-focused tasks requiring numerical reasoning and problem-solving skills. | English | -| [mmlu](mmlu/README.md) | Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported. | English | -| [mmlu_pro](mmlu_pro/README.md) | A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options. | English | -| [mmlusr](mmlusr/README.md) | Variation of MMLU designed to be more rigorous. | English | -| model_written_evals | Evaluation tasks auto-generated for evaluating a collection of AI Safety concerns. | | -| [mutual](mutual/README.md) | A retrieval-based dataset for multi-turn dialogue reasoning. | English | -| [nq_open](nq_open/README.md) | Open domain question answering tasks based on the Natural Questions dataset. | English | -| [okapi/arc_multilingual](okapi/arc_multilingual/README.md) | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (31 languages) **Machine Translated.** | -| [okapi/hellaswag_multilingual](okapi/hellaswag_multilingual/README.md) | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (30 languages) **Machine Translated.** | -| okapi/mmlu_multilingual | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (34 languages) **Machine Translated.** | -| [okapi/truthfulqa_multilingual](okapi/truthfulqa_multilingual/README.md) | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (31 languages) **Machine Translated.** | -| [openbookqa](openbookqa/README.md) | Open-book question answering tasks that require external knowledge and reasoning. | English | -| [paloma](paloma/README.md) | Paloma is a comprehensive benchmark designed to evaluate open language models across a wide range of domains, ranging from niche artist communities to mental health forums on Reddit. | English | -| [paws-x](paws-x/README.md) | Paraphrase Adversaries from Word Scrambling, focusing on cross-lingual capabilities. | English, French, Spanish, German, Chinese, Japanese, Korean | -| [pile](pile/README.md) | Open source language modelling data set that consists of 22 smaller, high-quality datasets. | English | -| [pile_10k](pile_10k/README.md) | The first 10K elements of The Pile, useful for debugging models trained on it. | English | -| [piqa](piqa/README.md) | Physical Interaction Question Answering tasks to test physical commonsense reasoning. | English | -| [polemo2](polemo2/README.md) | Sentiment analysis and emotion detection tasks based on Polish language data. | Polish | -| [portuguese_bench](portuguese_bench/README.md) | Collection of tasks in European Portuguese encompassing various evaluation areas. | Portuguese | -| [prost](prost/README.md) | Tasks requiring understanding of professional standards and ethics in various domains. | English | -| [pubmedqa](pubmedqa/README.md) | Question answering tasks based on PubMed research articles for biomedical understanding. | English | -| [qa4mre](qa4mre/README.md) | Question Answering for Machine Reading Evaluation, assessing comprehension and reasoning. | English | -| [qasper](qasper/README.md) | Question Answering dataset based on academic papers, testing in-depth scientific knowledge. | English | -| [race](race/README.md) | Reading comprehension assessment tasks based on English exams in China. | English | -| realtoxicityprompts | Tasks to evaluate language models for generating text with potential toxicity. | | -| [sciq](sciq/README.md) | Science Question Answering tasks to assess understanding of scientific concepts. | English | -| [score](score/README.md) | Systematic consistency and robustness evaluation for LLMs on 3 datasets(MMLU-Pro, Agi Eval and MATH) | English | -| [scrolls](scrolls/README.md) | Tasks that involve long-form reading comprehension across various domains. | English | -| [siqa](siqa/README.md) | Social Interaction Question Answering to evaluate common sense and social reasoning. | English | -| [spanish_bench](spanish_bench/README.md) | Collection of tasks in Spanish encompassing various evaluation areas. | Spanish | -| [squad_completion](squad_completion/README.md) | A variant of the SQuAD question answering task designed for zero-shot evaluation of small LMs. | English | -| [squadv2](squadv2/README.md) | Stanford Question Answering Dataset version 2, a reading comprehension benchmark. | English | -| [storycloze](storycloze/README.md) | Tasks to predict story endings, focusing on narrative logic and coherence. | English | -| [super_glue](super_glue/README.md) | A suite of challenging tasks designed to test a range of language understanding skills. | English | -| [swag](swag/README.md) | Situations With Adversarial Generations, predicting the next event in videos. | English | -| [swde](swde/README.md) | Information extraction tasks from semi-structured web pages. | English | -| [tinyBenchmarks](tinyBenchmarks/README.md) | Evaluation of large language models with fewer examples using tiny versions of popular benchmarks. | English | -| [tmmluplus](tmmluplus/README.md) | An extended set of tasks under the TMMLU framework for broader academic assessments. | Traditional Chinese | -| [toxigen](toxigen/README.md) | Tasks designed to evaluate language models on their propensity to generate toxic content. | English | -| [translation](translation/README.md) | Tasks focused on evaluating the language translation capabilities of models. | Arabic, English, Spanish, Basque, Hindi, Indonesian, Burmese, Russian, Swahili, Telugu, Chinese | -| [triviaqa](triviaqa/README.md) | A large-scale dataset for trivia question answering to test general knowledge. | English | -| [truthfulqa](truthfulqa/README.md) | A QA task aimed at evaluating the truthfulness and factual accuracy of model responses. | English | -| [turkishmmlu](turkishmmlu/README.md) | A multiple-choice QA test modeled after MMLU, written in Turkish based on Turkish high-school level exams. | Turkish | -| [unitxt](unitxt/README.md) | A number of tasks implemented using the unitxt library for flexible, shareable, and reusable data preparation and evaluation for generative AI. | English | -| [unscramble](unscramble/README.md) | Tasks involving the rearrangement of scrambled sentences to test syntactic understanding. | English | -| [webqs](webqs/README.md) | Web-based question answering tasks designed to evaluate internet search and retrieval. | English | -| [wikitext](wikitext/README.md) | Tasks based on text from Wikipedia articles to assess language modeling and generation. | English | -| [winogrande](winogrande/README.md) | A large-scale dataset for coreference resolution, inspired by the Winograd Schema Challenge. | English | -| [wmdp](wmdp/README.md) | A benchmark with the objective of minimizing performance, based on potentially-sensitive multiple-choice knowledge questions. | English | -| [wmt2016](wmt2016/README.md) | Tasks from the WMT 2016 shared task, focusing on translation between multiple languages. | English, Czech, German, Finnish, Russian, Romanian, Turkish | -| [wsc273](wsc273/README.md) | The Winograd Schema Challenge, a test of commonsense reasoning and coreference resolution. | English | -| [xcopa](xcopa/README.md) | Cross-lingual Choice of Plausible Alternatives, testing reasoning in multiple languages. | Estonian, Haitian, Indonesian, Italian, Quechua, Swahili, Tamil, Thai, Turkish, Vietnamese, Chinese | +| Task Family | Description | Language(s) | +|-------------|-------------|-------------------------------------------------------------------------------------------------------------------------------| +| [aclue](aclue/README.md) | Tasks focusing on ancient Chinese language understanding and cultural aspects. | Ancient Chinese | +| [aexams](aexams/README.md) | Tasks in Arabic related to various academic exams covering a range of subjects. | Arabic | +| [agieval](agieval/README.md) | Tasks involving historical data or questions related to history and historical texts. | English, Chinese | +| [anli](anli/README.md) | Adversarial natural language inference tasks designed to test model robustness. | English | +| [arabic_leaderboard_complete](arabic_leaderboard_complete/README.md) | A full version of the tasks in the Open Arabic LLM Leaderboard, focusing on the evaluation of models that reflect the characteristics of Arabic language understanding and comprehension, culture, and heritage. Note that some of these tasks are machine-translated. | Arabic (Some MT) | +| [arabic_leaderboard_light](arabic_leaderboard_light/README.md) | A light version of the tasks in the Open Arabic LLM Leaderboard (i.e., 10% samples of the test set in the original benchmarks), focusing on the evaluation of models that reflect the characteristics of Arabic language understanding and comprehension, culture, and heritage. Note that some of these tasks are machine-translated. | Arabic (Some MT) | +| [arabicmmlu](arabicmmlu/README.md) | Localized Arabic version of MMLU with multiple-choice questions from 40 subjects. | Arabic | +| [AraDICE](aradice/README.md) | A collection of multiple tasks carefully designed to evaluate dialectal and cultural capabilities in large language models (LLMs). | Arabic | +| [arc](arc/README.md) | Tasks involving complex reasoning over a diverse set of questions. | English | +| [arithmetic](arithmetic/README.md) | Tasks involving numerical computations and arithmetic reasoning. | English | +| [asdiv](asdiv/README.md) | Tasks involving arithmetic and mathematical reasoning challenges. | English | +| [babi](babi/README.md) | Tasks designed as question and answering challenges based on simulated stories. | English | +| [basque_bench](basque_bench/README.md) | Collection of tasks in Basque encompassing various evaluation areas. | Basque | +| [basqueglue](basqueglue/README.md) | Tasks designed to evaluate language understanding in Basque language. | Basque | +| [bbh](bbh/README.md) | Tasks focused on deep semantic understanding through hypothesization and reasoning. | English, German | +| [belebele](belebele/README.md) | Language understanding tasks in a variety of languages and scripts. | Multiple (122 languages) | +| benchmarks | General benchmarking tasks that test a wide range of language understanding capabilities. | | +| [bertaqa](bertaqa/README.md) | Local Basque cultural trivia QA tests in English and Basque languages. | English, Basque, Basque (MT) | +| [bigbench](bigbench/README.md) | Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models. | Multiple | +| [blimp](blimp/README.md) | Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities. | English | +| [catalan_bench](catalan_bench/README.md) | Collection of tasks in Catalan encompassing various evaluation areas. | Catalan | +| [ceval](ceval/README.md) | Tasks that evaluate language understanding and reasoning in an educational context. | Chinese | +| [cmmlu](cmmlu/README.md) | Multi-subject multiple choice question tasks for comprehensive academic assessment. | Chinese | +| code_x_glue | Tasks that involve understanding and generating code across multiple programming languages. | Go, Java, JS, PHP, Python, Ruby | +| [commonsense_qa](commonsense_qa/README.md) | CommonsenseQA, a multiple-choice QA dataset for measuring commonsense knowledge. | English | +| [copal_id](copal_id/README.md) | Indonesian causal commonsense reasoning dataset that captures local nuances. | Indonesian | +| [coqa](coqa/README.md) | Conversational question answering tasks to test dialog understanding. | English | +| [crows_pairs](crows_pairs/README.md) | Tasks designed to test model biases in various sociodemographic groups. | English, French | +| csatqa | Tasks related to SAT and other standardized testing questions for academic assessment. | Korean | +| [drop](drop/README.md) | Tasks requiring numerical reasoning, reading comprehension, and question answering. | English | +| [eq_bench](eq_bench/README.md) | Tasks focused on equality and ethics in question answering and decision-making. | English | +| [eus_exams](eus_exams/README.md) | Tasks based on various professional and academic exams in the Basque language. | Basque | +| [eus_proficiency](eus_proficiency/README.md) | Tasks designed to test proficiency in the Basque language across various topics. | Basque | +| [eus_reading](eus_reading/README.md) | Reading comprehension tasks specifically designed for the Basque language. | Basque | +| [eus_trivia](eus_trivia/README.md) | Trivia and knowledge testing tasks in the Basque language. | Basque | +| [fda](fda/README.md) | Tasks for extracting key-value pairs from FDA documents to test information extraction. | English | +| [fld](fld/README.md) | Tasks involving free-form and directed dialogue understanding. | English | +| [french_bench](french_bench/README.md) | Set of tasks designed to assess language model performance in French. | French | +| [galician_bench](galician_bench/README.md) | Collection of tasks in Galician encompassing various evaluation areas. | Galician | +| [global_mmlu](global_mmlu/README.md) | Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits. | Multiple (15 languages) | +| [glue](glue/README.md) | General Language Understanding Evaluation benchmark to test broad language abilities. | English | +| [gpqa](gpqa/README.md) | Tasks designed for general public question answering and knowledge verification. | English | +| [gsm8k](gsm8k/README.md) | A benchmark of grade school math problems aimed at evaluating reasoning capabilities. | English | +| [haerae](haerae/README.md) | Tasks focused on assessing detailed factual and historical knowledge. | Korean | +| [headqa](headqa/README.md) | A high-level education-based question answering dataset to test specialized knowledge. | Spanish, English | +| [hellaswag](hellaswag/README.md) | Tasks to predict the ending of stories or scenarios, testing comprehension and creativity. | English | +| [hendrycks_ethics](hendrycks_ethics/README.md) | Tasks designed to evaluate the ethical reasoning capabilities of models. | English | +| [hendrycks_math](hendrycks_math/README.md) | Mathematical problem-solving tasks to test numerical reasoning and problem-solving. | English | +| [hrm8k](hrm8k/README.md) | A challenging bilingual math reasoning benchmark for Korean and English. | Korean (Some MT), English (Some MT) | +| [humaneval](humaneval/README.md) | Code generation task that measure functional correctness for synthesizing programs from docstrings. | Python | +| [ifeval](ifeval/README.md) | Interactive fiction evaluation tasks for narrative understanding and reasoning. | English | +| [inverse_scaling](inverse_scaling/README.md) | Multiple-choice tasks from the Inverse Scaling Prize, designed to find settings where larger language models perform worse. | English | +| [japanese_leaderboard](japanese_leaderboard/README.md) | Japanese language understanding tasks to benchmark model performance on various linguistic aspects. | Japanese | +| [kbl](kbl/README.md) | Korean Benchmark for Legal Language Understanding. | Korean | +| [kmmlu](kmmlu/README.md) | Knowledge-based multi-subject multiple choice questions for academic evaluation. | Korean | +| [kobest](kobest/README.md) | A collection of tasks designed to evaluate understanding in Korean language. | Korean | +| [kormedmcqa](kormedmcqa/README.md) | Medical question answering tasks in Korean to test specialized domain knowledge. | Korean | +| [lambada](lambada/README.md) | Tasks designed to predict the endings of text passages, testing language prediction skills. | English | +| [lambada_cloze](lambada_cloze/README.md) | Cloze-style LAMBADA dataset. | English | +| [lambada_multilingual](lambada_multilingual/README.md) | Multilingual LAMBADA dataset. This is a legacy version of the multilingual dataset, and users should instead use `lambada_multilingual_stablelm`. | German, English, Spanish, French, Italian | +| [lambada_multilingual_stablelm](lambada_multilingual_stablelm/README.md) | Multilingual LAMBADA dataset. Users should prefer evaluating on this version of the multilingual dataset instead of on `lambada_multilingual`. | German, English, Spanish, French, Italian, Dutch, Portuguese | +| [leaderboard](leaderboard/README.md) | Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time | English | +| [lingoly](lingoly/README.md) | Challenging logical reasoning benchmark in low-resource languages with controls for memorization | English, Multilingual | +| [logiqa](logiqa/README.md) | Logical reasoning tasks requiring advanced inference and deduction. | English, Chinese | +| [logiqa2](logiqa2/README.md) | Large-scale logical reasoning dataset adapted from the Chinese Civil Service Examination. | English, Chinese | +| [mathqa](mathqa/README.md) | Question answering tasks involving mathematical reasoning and problem-solving. | English | +| [mbpp](mbpp/README.md) | A benchmark designed to measure the ability to synthesize short Python programs from natural language descriptions. | Python | +| [mc_taco](mc_taco/README.md) | Question-answer pairs that require temporal commonsense comprehension. | English | +| [med_concepts_qa](med_concepts_qa/README.md) | Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept. | English | +| [metabench](metabench/README.md) | Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait. | English | +| medmcqa | Medical multiple choice questions assessing detailed medical knowledge. | English | +| medqa | Multiple choice question answering based on the United States Medical License Exams. | | +| [mgsm](mgsm/README.md) | Benchmark of multilingual grade-school math problems. | Spanish, French, German, Russian, Chinese, Japanese, Thai, Swahili, Bengali, Telugu | +| [minerva_math](minerva_math/README.md) | Mathematics-focused tasks requiring numerical reasoning and problem-solving skills. | English | +| [mlqa](mlqa/README.md) | MultiLingual Question Answering benchmark dataset for evaluating cross-lingual question answering performance. | English, Arabic, German, Spanish, Hindi, Vietnamese, Simplified Chinese | +| [mmlu](mmlu/README.md) | Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported. | English | +| [mmlu_pro](mmlu_pro/README.md) | A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options. | English | +| [mmlusr](mmlusr/README.md) | Variation of MMLU designed to be more rigorous. | English | +| model_written_evals | Evaluation tasks auto-generated for evaluating a collection of AI Safety concerns. | | +| [mutual](mutual/README.md) | A retrieval-based dataset for multi-turn dialogue reasoning. | English | +| [nq_open](nq_open/README.md) | Open domain question answering tasks based on the Natural Questions dataset. | English | +| [okapi/arc_multilingual](okapi/arc_multilingual/README.md) | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (31 languages) **Machine Translated.** | +| [okapi/hellaswag_multilingual](okapi/hellaswag_multilingual/README.md) | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (30 languages) **Machine Translated.** | +| okapi/mmlu_multilingual | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (34 languages) **Machine Translated.** | +| [okapi/truthfulqa_multilingual](okapi/truthfulqa_multilingual/README.md) | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (31 languages) **Machine Translated.** | +| [openbookqa](openbookqa/README.md) | Open-book question answering tasks that require external knowledge and reasoning. | English | +| [paloma](paloma/README.md) | Paloma is a comprehensive benchmark designed to evaluate open language models across a wide range of domains, ranging from niche artist communities to mental health forums on Reddit. | English | +| [paws-x](paws-x/README.md) | Paraphrase Adversaries from Word Scrambling, focusing on cross-lingual capabilities. | English, French, Spanish, German, Chinese, Japanese, Korean | +| [pile](pile/README.md) | Open source language modelling data set that consists of 22 smaller, high-quality datasets. | English | +| [pile_10k](pile_10k/README.md) | The first 10K elements of The Pile, useful for debugging models trained on it. | English | +| [piqa](piqa/README.md) | Physical Interaction Question Answering tasks to test physical commonsense reasoning. | English | +| [polemo2](polemo2/README.md) | Sentiment analysis and emotion detection tasks based on Polish language data. | Polish | +| [portuguese_bench](portuguese_bench/README.md) | Collection of tasks in European Portuguese encompassing various evaluation areas. | Portuguese | +| [prost](prost/README.md) | Tasks requiring understanding of professional standards and ethics in various domains. | English | +| [pubmedqa](pubmedqa/README.md) | Question answering tasks based on PubMed research articles for biomedical understanding. | English | +| [qa4mre](qa4mre/README.md) | Question Answering for Machine Reading Evaluation, assessing comprehension and reasoning. | English | +| [qasper](qasper/README.md) | Question Answering dataset based on academic papers, testing in-depth scientific knowledge. | English | +| [race](race/README.md) | Reading comprehension assessment tasks based on English exams in China. | English | +| realtoxicityprompts | Tasks to evaluate language models for generating text with potential toxicity. | | +| [sciq](sciq/README.md) | Science Question Answering tasks to assess understanding of scientific concepts. | English | +| [score](score/README.md) | Systematic consistency and robustness evaluation for LLMs on 3 datasets(MMLU-Pro, Agi Eval and MATH) | English | +| [scrolls](scrolls/README.md) | Tasks that involve long-form reading comprehension across various domains. | English | +| [siqa](siqa/README.md) | Social Interaction Question Answering to evaluate common sense and social reasoning. | English | +| [spanish_bench](spanish_bench/README.md) | Collection of tasks in Spanish encompassing various evaluation areas. | Spanish | +| [squad_completion](squad_completion/README.md) | A variant of the SQuAD question answering task designed for zero-shot evaluation of small LMs. | English | +| [squadv2](squadv2/README.md) | Stanford Question Answering Dataset version 2, a reading comprehension benchmark. | English | +| [storycloze](storycloze/README.md) | Tasks to predict story endings, focusing on narrative logic and coherence. | English | +| [super_glue](super_glue/README.md) | A suite of challenging tasks designed to test a range of language understanding skills. | English | +| [swag](swag/README.md) | Situations With Adversarial Generations, predicting the next event in videos. | English | +| [swde](swde/README.md) | Information extraction tasks from semi-structured web pages. | English | +| [tinyBenchmarks](tinyBenchmarks/README.md) | Evaluation of large language models with fewer examples using tiny versions of popular benchmarks. | English | +| [tmmluplus](tmmluplus/README.md) | An extended set of tasks under the TMMLU framework for broader academic assessments. | Traditional Chinese | +| [toxigen](toxigen/README.md) | Tasks designed to evaluate language models on their propensity to generate toxic content. | English | +| [translation](translation/README.md) | Tasks focused on evaluating the language translation capabilities of models. | Arabic, English, Spanish, Basque, Hindi, Indonesian, Burmese, Russian, Swahili, Telugu, Chinese | +| [triviaqa](triviaqa/README.md) | A large-scale dataset for trivia question answering to test general knowledge. | English | +| [truthfulqa](truthfulqa/README.md) | A QA task aimed at evaluating the truthfulness and factual accuracy of model responses. | English | +| [turkishmmlu](turkishmmlu/README.md) | A multiple-choice QA test modeled after MMLU, written in Turkish based on Turkish high-school level exams. | Turkish | +| [unitxt](unitxt/README.md) | A number of tasks implemented using the unitxt library for flexible, shareable, and reusable data preparation and evaluation for generative AI. | English | +| [unscramble](unscramble/README.md) | Tasks involving the rearrangement of scrambled sentences to test syntactic understanding. | English | +| [webqs](webqs/README.md) | Web-based question answering tasks designed to evaluate internet search and retrieval. | English | +| [wikitext](wikitext/README.md) | Tasks based on text from Wikipedia articles to assess language modeling and generation. | English | +| [winogrande](winogrande/README.md) | A large-scale dataset for coreference resolution, inspired by the Winograd Schema Challenge. | English | +| [wmdp](wmdp/README.md) | A benchmark with the objective of minimizing performance, based on potentially-sensitive multiple-choice knowledge questions. | English | +| [wmt2016](wmt2016/README.md) | Tasks from the WMT 2016 shared task, focusing on translation between multiple languages. | English, Czech, German, Finnish, Russian, Romanian, Turkish | +| [wsc273](wsc273/README.md) | The Winograd Schema Challenge, a test of commonsense reasoning and coreference resolution. | English | +| [xcopa](xcopa/README.md) | Cross-lingual Choice of Plausible Alternatives, testing reasoning in multiple languages. | Estonian, Haitian, Indonesian, Italian, Quechua, Swahili, Tamil, Thai, Turkish, Vietnamese, Chinese | | [xnli](xnli/README.md) | Cross-Lingual Natural Language Inference to test understanding across different languages. | Arabic, Bulgarian, German, Greek, English, Spanish, French, Hindi, Russian, Swahili, Thai, Turkish, Urdu, Vietnamese, Chinese | -| [xnli_eu](xnli_eu/README.md) | Cross-lingual Natural Language Inference tasks in Basque. | Basque | -| [xquad](xquad/README.md) | Cross-lingual Question Answering Dataset in multiple languages. | Arabic, German, Greek, English, Spanish, Hindi, Romanian, Russian, Thai, Turkish, Vietnamese, Chinese | -| [xstorycloze](xstorycloze/README.md) | Cross-lingual narrative understanding tasks to predict story endings in multiple languages. | Russian, Simplified Chinese, Spanish, Arabic, Hindi, Indonesian, Telugu, Swahili, Basque, Burmese | -| [xwinograd](xwinograd/README.md) | Cross-lingual Winograd schema tasks for coreference resolution in multiple languages. | English, French, Japanese, Portuguese, Russian, Chinese | +| [xnli_eu](xnli_eu/README.md) | Cross-lingual Natural Language Inference tasks in Basque. | Basque | +| [xquad](xquad/README.md) | Cross-lingual Question Answering Dataset in multiple languages. | Arabic, German, Greek, English, Spanish, Hindi, Romanian, Russian, Thai, Turkish, Vietnamese, Chinese | +| [xstorycloze](xstorycloze/README.md) | Cross-lingual narrative understanding tasks to predict story endings in multiple languages. | Russian, Simplified Chinese, Spanish, Arabic, Hindi, Indonesian, Telugu, Swahili, Basque, Burmese | +| [xwinograd](xwinograd/README.md) | Cross-lingual Winograd schema tasks for coreference resolution in multiple languages. | English, French, Japanese, Portuguese, Russian, Chinese | diff --git a/lm_eval/tasks/arabicmmlu/_arabicmmlu.yaml b/lm_eval/tasks/arabicmmlu/_arabicmmlu.yaml index 58cf795aef2ef87d5e5421d549b2de9ec61617f0..08ed9bb0c8bc32597554c6908cdb44002aa291be 100644 --- a/lm_eval/tasks/arabicmmlu/_arabicmmlu.yaml +++ b/lm_eval/tasks/arabicmmlu/_arabicmmlu.yaml @@ -9,4 +9,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 0 + version: 1 diff --git a/lm_eval/tasks/arabicmmlu/_arabicmmlu_humanities.yaml b/lm_eval/tasks/arabicmmlu/_arabicmmlu_humanities.yaml index 6f61004a9c45c15606d4ef2385270e6525e4fe1c..b52bc80470ebdd0348af002e33efc77e516252dd 100644 --- a/lm_eval/tasks/arabicmmlu/_arabicmmlu_humanities.yaml +++ b/lm_eval/tasks/arabicmmlu/_arabicmmlu_humanities.yaml @@ -6,4 +6,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 0 + version: 1 diff --git a/lm_eval/tasks/arabicmmlu/_arabicmmlu_language.yaml b/lm_eval/tasks/arabicmmlu/_arabicmmlu_language.yaml index 90e57ae09bcdae6a581a88faaa2530d85df8ce28..d9f62abc8d1684158d80bbfcbbd2e47d1cf144f3 100644 --- a/lm_eval/tasks/arabicmmlu/_arabicmmlu_language.yaml +++ b/lm_eval/tasks/arabicmmlu/_arabicmmlu_language.yaml @@ -6,4 +6,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 0 + version: 1 diff --git a/lm_eval/tasks/arabicmmlu/_arabicmmlu_other.yaml b/lm_eval/tasks/arabicmmlu/_arabicmmlu_other.yaml index 3e989b8c6c884b9dfea9cde8fbd8ff0351e7adf2..d96dc0bd32b05061995d1f672342b9320ea0ef9d 100644 --- a/lm_eval/tasks/arabicmmlu/_arabicmmlu_other.yaml +++ b/lm_eval/tasks/arabicmmlu/_arabicmmlu_other.yaml @@ -6,4 +6,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 0 + version: 1 diff --git a/lm_eval/tasks/arabicmmlu/_arabicmmlu_social_science.yaml b/lm_eval/tasks/arabicmmlu/_arabicmmlu_social_science.yaml index 1ece047b356aaac727da458ee9013fdab2b56c4a..b40e7c808981291c369b7364cc52a2b153c51e43 100644 --- a/lm_eval/tasks/arabicmmlu/_arabicmmlu_social_science.yaml +++ b/lm_eval/tasks/arabicmmlu/_arabicmmlu_social_science.yaml @@ -6,4 +6,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 0 + version: 1 diff --git a/lm_eval/tasks/arabicmmlu/_arabicmmlu_stem.yaml b/lm_eval/tasks/arabicmmlu/_arabicmmlu_stem.yaml index a464a62a9825edf5e9158df85f062b9093ad25e6..5065d0bde9f54ce93f2f189a9c7e3484f3e1da50 100644 --- a/lm_eval/tasks/arabicmmlu/_arabicmmlu_stem.yaml +++ b/lm_eval/tasks/arabicmmlu/_arabicmmlu_stem.yaml @@ -6,4 +6,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 0 + version: 1 diff --git a/lm_eval/tasks/arabicmmlu/_default_arabicmmlu_template_yaml b/lm_eval/tasks/arabicmmlu/_default_arabicmmlu_template_yaml index eac235773278bdf117108f631fd6780aab1c5349..471c0fc0b44ead783ea6b80d7029b13592703ff9 100644 --- a/lm_eval/tasks/arabicmmlu/_default_arabicmmlu_template_yaml +++ b/lm_eval/tasks/arabicmmlu/_default_arabicmmlu_template_yaml @@ -1,4 +1,4 @@ -dataset_path: yazeed7/ArabicMMLU +dataset_path: MBZUAI/ArabicMMLU test_split: test fewshot_split: dev fewshot_config: @@ -12,4 +12,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 0.0 + version: 1.0 diff --git a/lm_eval/tasks/arabicmmlu/_generate_configs.py b/lm_eval/tasks/arabicmmlu/_generate_configs.py index 4d091e126c52d5cdc1f7f16b13f74fad6e4667d3..5dc627e598dd794c07199a9bf83a98e6ca36377a 100644 --- a/lm_eval/tasks/arabicmmlu/_generate_configs.py +++ b/lm_eval/tasks/arabicmmlu/_generate_configs.py @@ -14,46 +14,46 @@ eval_logger = logging.getLogger("lm-eval") SUBJECTS = { - "Driving Test": "other", - "High Geography": "social_science", - "High History": "humanities", "Islamic Studies": "humanities", - "Univ Accounting": "social_science", - "Primary General Knowledge": "other", - "Univ Political Science": "social_science", - "Primary Math": "stem", - "Middle General Knowledge": "other", - "High Biology": "stem", - "Primary Natural Science": "stem", - "High Economics": "social_science", - "Middle Natural Science": "stem", - "Middle Geography": "social_science", - "Primary Social Science": "social_science", - "Middle Computer Science": "stem", - "Middle Islamic Studies": "humanities", - "Primary Computer Science": "stem", - "High Physics": "stem", - "Middle Social Science": "social_science", - "Middle Civics": "social_science", - "High Computer Science": "stem", + "Driving Test": "other", + "Natural Science (Middle School)": "stem", + "Natural Science (Primary School)": "stem", + "History (Primary School)": "humanities", + "History (Middle School)": "humanities", + "History (High School)": "humanities", "General Knowledge": "other", - "High Civics": "social_science", - "Prof Law": "humanities", - "High Islamic Studies": "humanities", - "Primary Arabic Language": "language", - "High Arabic Language": "language", - "Arabic Language (Grammar)": "language", - "Primary History": "humanities", - "Middle History": "humanities", - "Univ Economics": "social_science", + "General Knowledge (Primary School)": "other", + "General Knowledge (Middle School)": "other", + "Law (Professional)": "humanities", + "Physics (High School)": "stem", + "Social Science (Middle School)": "social_science", + "Social Science (Primary School)": "social_science", + "Management (University)": "other", + "Arabic Language (Primary School)": "language", + "Arabic Language (Middle School)": "language", + "Arabic Language (High School)": "language", + "Political Science (University)": "social_science", + "Philosophy (High School)": "humanities", + "Accounting (University)": "social_science", + "Computer Science (University)": "stem", + "Computer Science (Middle School)": "stem", + "Computer Science (Primary School)": "stem", + "Computer Science (High School)": "stem", + "Geography (Primary School)": "social_science", + "Geography (Middle School)": "social_science", + "Geography (High School)": "social_science", + "Math (Primary School)": "stem", + "Biology (High School)": "stem", + "Economics (University)": "social_science", + "Economics (Middle School)": "social_science", + "Economics (High School)": "social_science", "Arabic Language (General)": "language", - "Univ Computer Science": "stem", - "Primary Islamic Studies": "humanities", - "Primary Geography": "social_science", - "High Philosophy": "humanities", - "Middle Arabic Language": "language", - "Middle Economics": "social_science", - "Univ Management": "other", + "Arabic Language (Grammar)": "language", + "Islamic Studies (High School)": "humanities", + "Islamic Studies (Middle School)": "humanities", + "Islamic Studies (Primary School)": "humanities", + "Civics (Middle School)": "social_science", + "Civics (High School)": "social_science", } @@ -69,8 +69,9 @@ if __name__ == "__main__": # get filename of base_yaml so we can `"include": ` it in our "other" YAMLs. base_yaml_name = os.path.split(args.base_yaml_path)[-1] - with open(args.base_yaml_path, encoding="utf-8") as f: - base_yaml = yaml.full_load(f) + + # with open(args.base_yaml_path, encoding="utf-8") as f: + # base_yaml = yaml.full_load(f) ALL_CATEGORIES = [] for subject, category in tqdm(SUBJECTS.items()): @@ -81,8 +82,8 @@ if __name__ == "__main__": yaml_dict = { "include": base_yaml_name, - "tag": f"arabicmmlu_{category}", - "task": f"arabicmmlu_{subject.lower().replace(' ', '_')}", + "tag": f"arabicmmlu_{category}_tasks", + "task": f"arabicmmlu_{subject.lower().replace(' ', '_').replace('(', '').replace(')', '')}", "task_alias": subject, "dataset_name": subject, # "description": description, diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_accounting_university.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_accounting_university.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7ec8caad6e0317a081719b568844d2cd8d9e34ea --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_accounting_university.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Accounting (University)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_accounting_university" +"task_alias": "Accounting (University)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_general.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_general.yaml index f57dc08c22086023626c4181dc78cd38c1de1900..621312d98b529d9f26cf70bbfb1356d656d21472 100644 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_general.yaml +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_general.yaml @@ -1,5 +1,5 @@ "dataset_name": "Arabic Language (General)" -"tag": "arabicmmlu_language_tasks" "include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_arabic_language_(general)" +"tag": "arabicmmlu_language_tasks" +"task": "arabicmmlu_arabic_language_general" "task_alias": "Arabic Language (General)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_grammar.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_grammar.yaml index baf32676283101b319325e8a5e19ebae78e2d115..0511b9d91de2bc92705fee06a42d7bf68f14b231 100644 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_grammar.yaml +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_grammar.yaml @@ -1,5 +1,5 @@ "dataset_name": "Arabic Language (Grammar)" -"tag": "arabicmmlu_language_tasks" "include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_arabic_language_(grammar)" +"tag": "arabicmmlu_language_tasks" +"task": "arabicmmlu_arabic_language_grammar" "task_alias": "Arabic Language (Grammar)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_high_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_high_school.yaml new file mode 100644 index 0000000000000000000000000000000000000000..77dc002bb744c90632bf3e4bad0bdfc5659fe375 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_high_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Arabic Language (High School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_language_tasks" +"task": "arabicmmlu_arabic_language_high_school" +"task_alias": "Arabic Language (High School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_middle_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_middle_school.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9b9b2007495e04ae445895efb8484b0da799d45d --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_middle_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Arabic Language (Middle School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_language_tasks" +"task": "arabicmmlu_arabic_language_middle_school" +"task_alias": "Arabic Language (Middle School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_primary_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_primary_school.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c0f045d8825b6a6e778211f0aead037b628b907 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_primary_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Arabic Language (Primary School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_language_tasks" +"task": "arabicmmlu_arabic_language_primary_school" +"task_alias": "Arabic Language (Primary School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_biology_high_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_biology_high_school.yaml new file mode 100644 index 0000000000000000000000000000000000000000..865a477dee67f0ba7a038151844702906a23ea5e --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_biology_high_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Biology (High School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_stem_tasks" +"task": "arabicmmlu_biology_high_school" +"task_alias": "Biology (High School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_civics_high_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_civics_high_school.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6f81e9220f3def1a6198a1feaf07f78145ab2520 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_civics_high_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Civics (High School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_civics_high_school" +"task_alias": "Civics (High School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_civics_middle_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_civics_middle_school.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3e82c777caa01b761af706bffcf834f13cb02b87 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_civics_middle_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Civics (Middle School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_civics_middle_school" +"task_alias": "Civics (Middle School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_high_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_high_school.yaml new file mode 100644 index 0000000000000000000000000000000000000000..59aa929d3ceb33d78d849a0e9a8f7e3873b8ab5a --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_high_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Computer Science (High School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_stem_tasks" +"task": "arabicmmlu_computer_science_high_school" +"task_alias": "Computer Science (High School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_middle_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_middle_school.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3ecdc10616fa01a32f0ce9924f6140effc7271ef --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_middle_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Computer Science (Middle School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_stem_tasks" +"task": "arabicmmlu_computer_science_middle_school" +"task_alias": "Computer Science (Middle School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_primary_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_primary_school.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8feec4aaadf6bc87576f70d042aee1e2cf7461f5 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_primary_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Computer Science (Primary School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_stem_tasks" +"task": "arabicmmlu_computer_science_primary_school" +"task_alias": "Computer Science (Primary School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_university.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_university.yaml new file mode 100644 index 0000000000000000000000000000000000000000..327cfab645fdd32688f3c337f1e8877a9d94e0ad --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_university.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Computer Science (University)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_stem_tasks" +"task": "arabicmmlu_computer_science_university" +"task_alias": "Computer Science (University)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_driving_test.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_driving_test.yaml index d40c9eb9d69b50b6fb6196733c40789e52dd2621..ab951dfc878fa8f98a7ada1e13d3ed9e7d856494 100644 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_driving_test.yaml +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_driving_test.yaml @@ -1,5 +1,5 @@ "dataset_name": "Driving Test" -"tag": "arabicmmlu_other_tasks" "include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_other_tasks" "task": "arabicmmlu_driving_test" "task_alias": "Driving Test" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_economics_high_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_economics_high_school.yaml new file mode 100644 index 0000000000000000000000000000000000000000..78cba021270843b6056ba1845a90556f98ade506 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_economics_high_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Economics (High School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_economics_high_school" +"task_alias": "Economics (High School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_economics_middle_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_economics_middle_school.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ed004b34a774212a6fb3f69ff05fa809d87f3fa7 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_economics_middle_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Economics (Middle School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_economics_middle_school" +"task_alias": "Economics (Middle School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_economics_university.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_economics_university.yaml new file mode 100644 index 0000000000000000000000000000000000000000..76bfe4f1c53af0e17fef375e6676db490f68c82e --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_economics_university.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Economics (University)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_economics_university" +"task_alias": "Economics (University)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge.yaml index fbd8839dba0a3b4fb552a5e9aae82da8f32d63cb..8ac6e71066262860a28a47ec00b887b2b8d7329d 100644 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge.yaml +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge.yaml @@ -1,5 +1,5 @@ "dataset_name": "General Knowledge" -"tag": "arabicmmlu_other_tasks" "include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_other_tasks" "task": "arabicmmlu_general_knowledge" "task_alias": "General Knowledge" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge_middle_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge_middle_school.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a6e4b7c97c93a885f3ff9c40392517af8e4bad3d --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge_middle_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "General Knowledge (Middle School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_other_tasks" +"task": "arabicmmlu_general_knowledge_middle_school" +"task_alias": "General Knowledge (Middle School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge_primary_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge_primary_school.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0735829975cde6ad6ddcd3bf7a12ae81f1f8852e --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge_primary_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "General Knowledge (Primary School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_other_tasks" +"task": "arabicmmlu_general_knowledge_primary_school" +"task_alias": "General Knowledge (Primary School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_geography_high_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_geography_high_school.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b6264fc45801cb0af139e08bd742693058986020 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_geography_high_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Geography (High School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_geography_high_school" +"task_alias": "Geography (High School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_geography_middle_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_geography_middle_school.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6483749f897a1d353c19020186e5f43dbd5ba8ab --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_geography_middle_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Geography (Middle School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_geography_middle_school" +"task_alias": "Geography (Middle School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_geography_primary_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_geography_primary_school.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1465fb05a5e39106479b9532f99fd0b0d7349fe4 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_geography_primary_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Geography (Primary School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_geography_primary_school" +"task_alias": "Geography (Primary School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_arabic_language.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_high_arabic_language.yaml deleted file mode 100644 index 17d17bc8b0421424e1fcb4332f83ad5a3bc1a8ae..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_arabic_language.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "High Arabic Language" -"tag": "arabicmmlu_language_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_high_arabic_language" -"task_alias": "High Arabic Language" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_biology.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_high_biology.yaml deleted file mode 100644 index 2b5baf0bb958b2f8dbad17d7d280e59e82ef23ab..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_biology.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "High Biology" -"tag": "arabicmmlu_stem_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_high_biology" -"task_alias": "High Biology" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_civics.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_high_civics.yaml deleted file mode 100644 index 870509229c7d0035ba89a814e67fc36798faaab9..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_civics.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "High Civics" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_high_civics" -"task_alias": "High Civics" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_computer_science.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_high_computer_science.yaml deleted file mode 100644 index f1a66a5ce87bb987b5fd389781835bee6fdd2079..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_computer_science.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "High Computer Science" -"tag": "arabicmmlu_stem_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_high_computer_science" -"task_alias": "High Computer Science" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_economics.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_high_economics.yaml deleted file mode 100644 index a1d6e90f537b855375606a3674b2e6cc32f6ef69..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_economics.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "High Economics" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_high_economics" -"task_alias": "High Economics" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_geography.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_high_geography.yaml deleted file mode 100644 index ad9804328b836253fd441536bec9aa4c9b21b8b0..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_geography.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "High Geography" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_high_geography" -"task_alias": "High Geography" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_history.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_high_history.yaml deleted file mode 100644 index 49c82669df895fcce907a7d9ad151b40983b7a90..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_history.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "High History" -"tag": "arabicmmlu_humanities_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_high_history" -"task_alias": "High History" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_islamic_studies.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_high_islamic_studies.yaml deleted file mode 100644 index 15b5358bd0a2d21917c4500bdeac6cabacdbd5d1..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_islamic_studies.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "High Islamic Studies" -"tag": "arabicmmlu_humanities_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_high_islamic_studies" -"task_alias": "High Islamic Studies" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_philosophy.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_high_philosophy.yaml deleted file mode 100644 index e0b20e306dc269064eaa248849a80faef1a920ae..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_philosophy.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "High Philosophy" -"tag": "arabicmmlu_humanities_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_high_philosophy" -"task_alias": "High Philosophy" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_physics.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_high_physics.yaml deleted file mode 100644 index a7fe5eccdba8eb490f6d8aa0aca6f7b56d74ba49..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_physics.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "High Physics" -"tag": "arabicmmlu_stem_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_high_physics" -"task_alias": "High Physics" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_history_high_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_history_high_school.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b97a081a71bce1ddc84e2207d53993f182a8270b --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_history_high_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "History (High School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_humanities_tasks" +"task": "arabicmmlu_history_high_school" +"task_alias": "History (High School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_history_middle_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_history_middle_school.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3435604a4159ffb2fa425813997bf38343a3f27b --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_history_middle_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "History (Middle School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_humanities_tasks" +"task": "arabicmmlu_history_middle_school" +"task_alias": "History (Middle School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_history_primary_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_history_primary_school.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c156ff521a7af619adc9ae3ba30f08358e6751d0 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_history_primary_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "History (Primary School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_humanities_tasks" +"task": "arabicmmlu_history_primary_school" +"task_alias": "History (Primary School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies.yaml index bacd5ace3feea344305c6579c3fdf98c55df04ae..4d5020a5ab95397d71b30b8b9a00df137c01280b 100644 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies.yaml +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies.yaml @@ -1,5 +1,5 @@ "dataset_name": "Islamic Studies" -"tag": "arabicmmlu_humanities_tasks" "include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_humanities_tasks" "task": "arabicmmlu_islamic_studies" "task_alias": "Islamic Studies" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_high_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_high_school.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5bae042f3143a82c29c131320049a921a9bb98a4 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_high_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Islamic Studies (High School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_humanities_tasks" +"task": "arabicmmlu_islamic_studies_high_school" +"task_alias": "Islamic Studies (High School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_middle_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_middle_school.yaml new file mode 100644 index 0000000000000000000000000000000000000000..af192fc1f6bf64866232a68cbf70d18013e16923 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_middle_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Islamic Studies (Middle School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_humanities_tasks" +"task": "arabicmmlu_islamic_studies_middle_school" +"task_alias": "Islamic Studies (Middle School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_primary_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_primary_school.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c4e5d3543dda46309d310b5eef5edebde575bb22 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_primary_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Islamic Studies (Primary School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_humanities_tasks" +"task": "arabicmmlu_islamic_studies_primary_school" +"task_alias": "Islamic Studies (Primary School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_law_professional.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_law_professional.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e2b6a4a42c720bfadfa9a505265b88ffbcc9660 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_law_professional.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Law (Professional)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_humanities_tasks" +"task": "arabicmmlu_law_professional" +"task_alias": "Law (Professional)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_management_university.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_management_university.yaml new file mode 100644 index 0000000000000000000000000000000000000000..386c8e6b7623a5e51c0a557fb4f8958a7604ddf4 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_management_university.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Management (University)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_other_tasks" +"task": "arabicmmlu_management_university" +"task_alias": "Management (University)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_math_primary_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_math_primary_school.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1df99b8a0f46d57147bfcb7c8ae16a7b6bca1f9b --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_math_primary_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Math (Primary School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_stem_tasks" +"task": "arabicmmlu_math_primary_school" +"task_alias": "Math (Primary School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_arabic_language.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_arabic_language.yaml deleted file mode 100644 index 14a2ab1a5286857cf3747bc00a01839eeb348682..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_arabic_language.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Middle Arabic Language" -"tag": "arabicmmlu_language_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_middle_arabic_language" -"task_alias": "Middle Arabic Language" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_civics.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_civics.yaml deleted file mode 100644 index 44ba95d458ccd08c720d30cf658bca719850f942..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_civics.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Middle Civics" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_middle_civics" -"task_alias": "Middle Civics" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_computer_science.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_computer_science.yaml deleted file mode 100644 index 8dd4136fb03ef49576e898a8576f2c33b37be3e8..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_computer_science.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Middle Computer Science" -"tag": "arabicmmlu_stem_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_middle_computer_science" -"task_alias": "Middle Computer Science" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_economics.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_economics.yaml deleted file mode 100644 index 312fa2e332f0eb8b5ff0801f27b9964d370862fc..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_economics.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Middle Economics" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_middle_economics" -"task_alias": "Middle Economics" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_general_knowledge.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_general_knowledge.yaml deleted file mode 100644 index c359d85ac77792fb7373213ca32b5c7893e53c7c..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_general_knowledge.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Middle General Knowledge" -"tag": "arabicmmlu_other_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_middle_general_knowledge" -"task_alias": "Middle General Knowledge" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_geography.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_geography.yaml deleted file mode 100644 index 111b13cfe059afde2727bdc16ff3a2493304fa15..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_geography.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Middle Geography" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_middle_geography" -"task_alias": "Middle Geography" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_history.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_history.yaml deleted file mode 100644 index 615a2e51f89b2b6e4489bbaf6420b27f7cf3c7c4..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_history.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Middle History" -"tag": "arabicmmlu_humanities_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_middle_history" -"task_alias": "Middle History" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_islamic_studies.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_islamic_studies.yaml deleted file mode 100644 index 449223600e01bdef3fb97641efc421bbeb49b9dc..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_islamic_studies.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Middle Islamic Studies" -"tag": "arabicmmlu_humanities_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_middle_islamic_studies" -"task_alias": "Middle Islamic Studies" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_natural_science.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_natural_science.yaml deleted file mode 100644 index 265cdbaa03b9ebf6015913b17579087d75f28365..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_natural_science.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Middle Natural Science" -"tag": "arabicmmlu_stem_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_middle_natural_science" -"task_alias": "Middle Natural Science" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_social_science.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_social_science.yaml deleted file mode 100644 index 84c247dd17e04a595c3c783171e452b6e7f4667f..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_social_science.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Middle Social Science" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_middle_social_science" -"task_alias": "Middle Social Science" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_natural_science_middle_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_natural_science_middle_school.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3b61531d16a3a3c7ad34561c274762ec77726bbf --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_natural_science_middle_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Natural Science (Middle School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_stem_tasks" +"task": "arabicmmlu_natural_science_middle_school" +"task_alias": "Natural Science (Middle School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_natural_science_primary_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_natural_science_primary_school.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1efd6c9bdf9c63d48c298639090e187588c68c7d --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_natural_science_primary_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Natural Science (Primary School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_stem_tasks" +"task": "arabicmmlu_natural_science_primary_school" +"task_alias": "Natural Science (Primary School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_philosophy_high_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_philosophy_high_school.yaml new file mode 100644 index 0000000000000000000000000000000000000000..66715bb054d1e907694ef86c8077ef9de16938c9 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_philosophy_high_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Philosophy (High School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_humanities_tasks" +"task": "arabicmmlu_philosophy_high_school" +"task_alias": "Philosophy (High School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_physics_high_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_physics_high_school.yaml new file mode 100644 index 0000000000000000000000000000000000000000..00ecf8ad181aed62a1e987d0abfc6d5c21ea15de --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_physics_high_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Physics (High School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_stem_tasks" +"task": "arabicmmlu_physics_high_school" +"task_alias": "Physics (High School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_political_science_university.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_political_science_university.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f64125fefbff9e2a1196dc2d281fc7496115ceb --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_political_science_university.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Political Science (University)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_political_science_university" +"task_alias": "Political Science (University)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_arabic_language.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_arabic_language.yaml deleted file mode 100644 index 700bc0781b5741661bbecc7c755f1be16647ec52..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_arabic_language.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Primary Arabic Language" -"tag": "arabicmmlu_language_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_primary_arabic_language" -"task_alias": "Primary Arabic Language" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_computer_science.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_computer_science.yaml deleted file mode 100644 index b89089cd2f6eb278ac452b0dcc95bde643bcef20..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_computer_science.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Primary Computer Science" -"tag": "arabicmmlu_stem_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_primary_computer_science" -"task_alias": "Primary Computer Science" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_general_knowledge.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_general_knowledge.yaml deleted file mode 100644 index 85dd0b7fa7ce7db9e97c8928fef0dfab47fe4c24..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_general_knowledge.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Primary General Knowledge" -"tag": "arabicmmlu_other_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_primary_general_knowledge" -"task_alias": "Primary General Knowledge" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_geography.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_geography.yaml deleted file mode 100644 index f7efc48783c965c1eb1430834d9fbb08c25244ed..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_geography.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Primary Geography" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_primary_geography" -"task_alias": "Primary Geography" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_history.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_history.yaml deleted file mode 100644 index f7d69ca975eb8a735cf50b4ceaeef53b292a6674..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_history.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Primary History" -"tag": "arabicmmlu_humanities_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_primary_history" -"task_alias": "Primary History" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_islamic_studies.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_islamic_studies.yaml deleted file mode 100644 index b36cd640376b0079dbb1cb609106de7266e2cd76..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_islamic_studies.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Primary Islamic Studies" -"tag": "arabicmmlu_humanities_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_primary_islamic_studies" -"task_alias": "Primary Islamic Studies" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_math.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_math.yaml deleted file mode 100644 index 0e53adcfa7fcccfafc69e1f763dafec8dd4425db..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_math.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Primary Math" -"tag": "arabicmmlu_stem_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_primary_math" -"task_alias": "Primary Math" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_natural_science.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_natural_science.yaml deleted file mode 100644 index 4e208c76faca4f773d88738d0bec0e6ca55895a8..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_natural_science.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Primary Natural Science" -"tag": "arabicmmlu_stem_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_primary_natural_science" -"task_alias": "Primary Natural Science" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_social_science.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_social_science.yaml deleted file mode 100644 index fee4fe5d0b182c24016b5080f0d605f8f22ef2ee..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_social_science.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Primary Social Science" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_primary_social_science" -"task_alias": "Primary Social Science" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_prof_law.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_prof_law.yaml deleted file mode 100644 index 20bf6c5f49ba05df166d589e761d2e3ac4ef43e3..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_prof_law.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Prof Law" -"tag": "arabicmmlu_humanities_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_prof_law" -"task_alias": "Prof Law" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_social_science_middle_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_social_science_middle_school.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b876649f9e8b0ade91766baa95a318aaf727ef5d --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_social_science_middle_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Social Science (Middle School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_social_science_middle_school" +"task_alias": "Social Science (Middle School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_social_science_primary_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_social_science_primary_school.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6f68848085e1e8428bbeab6cee05ac9a006c973e --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_social_science_primary_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Social Science (Primary School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_social_science_primary_school" +"task_alias": "Social Science (Primary School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_accounting.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_accounting.yaml deleted file mode 100644 index 6d1d94125edcb6ffb7ee13853ce3b8d0014dbd18..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_accounting.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Univ Accounting" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_univ_accounting" -"task_alias": "Univ Accounting" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_computer_science.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_computer_science.yaml deleted file mode 100644 index 42e7e89a965a006f7e8a6ae171057662d3ac93ba..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_computer_science.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Univ Computer Science" -"tag": "arabicmmlu_stem_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_univ_computer_science" -"task_alias": "Univ Computer Science" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_economics.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_economics.yaml deleted file mode 100644 index 21015ffad848c98398a0b4d54e71ea8e4dd58ba4..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_economics.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Univ Economics" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_univ_economics" -"task_alias": "Univ Economics" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_management.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_management.yaml deleted file mode 100644 index e69ad74b3da4b7bbf5c8891b1340daecb74e6654..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_management.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Univ Management" -"tag": "arabicmmlu_other_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_univ_management" -"task_alias": "Univ Management" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_political_science.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_political_science.yaml deleted file mode 100644 index bb85a104e1c6ffac215772f0266acaee894c0a37..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_political_science.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Univ Political Science" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_univ_political_science" -"task_alias": "Univ Political Science" diff --git a/lm_eval/tasks/arabicmmlu/utils.py b/lm_eval/tasks/arabicmmlu/utils.py index e1ed4b99a9fe4b6b39d1d08ca3afd482f7e983ef..a572489e118564601243e6a6bf813b77cbe95220 100644 --- a/lm_eval/tasks/arabicmmlu/utils.py +++ b/lm_eval/tasks/arabicmmlu/utils.py @@ -23,7 +23,7 @@ def doc_to_text(doc): question = ( doc["Question"] - if doc["Context"] == "" + if not doc["Context"] else f"{doc['Context']}\n\n{doc['Question']}" ) @@ -41,4 +41,4 @@ def doc_to_text(doc): def doc_to_choice(doc): - return [alpa[i][0] for i in range(5) if doc[f"Option {i+1}"]] + return [alpa[i][0] for i in range(5) if doc[f"Option {i + 1}"]] diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU.yaml new file mode 100644 index 0000000000000000000000000000000000000000..77cbf95ace833b0c513034e240513bae3259caa4 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU.yaml @@ -0,0 +1,12 @@ +group: AraDiCE_ArabicMMLU_egy +task: +- AraDiCE_ArabicMMLU_humanities_egy +- AraDiCE_ArabicMMLU_language_egy +- AraDiCE_ArabicMMLU_social-science_egy +- AraDiCE_ArabicMMLU_stem_egy +- AraDiCE_ArabicMMLU_other_egy +aggregate_metric_list: + - metric: acc + weight_by_size: True + - metric: acc_norm + weight_by_size: True diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_high_humanities_history.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_high_humanities_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5a03177d137ae08ff327788992100fb62588f139 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_high_humanities_history.yaml @@ -0,0 +1,10 @@ +"dataset_name": "high_humanities_history" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_humanities_egy" +"task": "AraDiCE_ArabicMMLU_high_humanities_history_egy" +"task_alias": "high humanities history" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_high_humanities_islamic-studies.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_high_humanities_islamic-studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ee65adc6dbf36ef7632ec9638ee990f8a73360d8 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_high_humanities_islamic-studies.yaml @@ -0,0 +1,10 @@ +"dataset_name": "high_humanities_islamic-studies" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_humanities_egy" +"task": "AraDiCE_ArabicMMLU_high_humanities_islamic-studies_egy" +"task_alias": "high humanities islamic-studies" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_high_humanities_philosophy.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_high_humanities_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..123f696f30977f71872c926325fc924e12f0dccc --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_high_humanities_philosophy.yaml @@ -0,0 +1,10 @@ +"dataset_name": "high_humanities_philosophy" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_humanities_egy" +"task": "AraDiCE_ArabicMMLU_high_humanities_philosophy_egy" +"task_alias": "high humanities philosophy" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_high_language_arabic-language.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_high_language_arabic-language.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1df05181daeebb89e12dc8ca66d24becb950ab72 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_high_language_arabic-language.yaml @@ -0,0 +1,10 @@ +"dataset_name": "high_language_arabic-language" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_language_egy" +"task": "AraDiCE_ArabicMMLU_high_language_arabic-language_egy" +"task_alias": "high language arabic-language" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_high_social-science_civics.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_high_social-science_civics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7b42490b066b7919d83c0cbad44398dec147fac5 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_high_social-science_civics.yaml @@ -0,0 +1,10 @@ +"dataset_name": "high_social-science_civics" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_social-science_egy" +"task": "AraDiCE_ArabicMMLU_high_social-science_civics_egy" +"task_alias": "high social-science civics" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_high_social-science_economics.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_high_social-science_economics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5518b2cda31c2f2482e56fffc4cfa7ca44bc1bb5 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_high_social-science_economics.yaml @@ -0,0 +1,10 @@ +"dataset_name": "high_social-science_economics" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_social-science_egy" +"task": "AraDiCE_ArabicMMLU_high_social-science_economics_egy" +"task_alias": "high social-science economics" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_high_social-science_geography.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_high_social-science_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d9a2d5b332976d20a362baf53c8633ec1132e62f --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_high_social-science_geography.yaml @@ -0,0 +1,10 @@ +"dataset_name": "high_social-science_geography" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_social-science_egy" +"task": "AraDiCE_ArabicMMLU_high_social-science_geography_egy" +"task_alias": "high social-science geography" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_high_stem_biology.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_high_stem_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3f1ab8a7b8768712e46cb9950113c014af205dc8 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_high_stem_biology.yaml @@ -0,0 +1,10 @@ +"dataset_name": "high_stem_biology" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_stem_egy" +"task": "AraDiCE_ArabicMMLU_high_stem_biology_egy" +"task_alias": "high stem biology" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_high_stem_computer-science.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_high_stem_computer-science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c27f5be3185b1140ba07e56c6335c275fa9e0b1e --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_high_stem_computer-science.yaml @@ -0,0 +1,10 @@ +"dataset_name": "high_stem_computer-science" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_stem_egy" +"task": "AraDiCE_ArabicMMLU_high_stem_computer-science_egy" +"task_alias": "high stem computer-science" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_high_stem_physics.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_high_stem_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4e24a2f4fbbd0a7ae25abc501bae68b3909b7259 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_high_stem_physics.yaml @@ -0,0 +1,10 @@ +"dataset_name": "high_stem_physics" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_stem_egy" +"task": "AraDiCE_ArabicMMLU_high_stem_physics_egy" +"task_alias": "high stem physics" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_middle_humanities_history.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_middle_humanities_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9f2c3770406823a48930876f3d761a7e0bfe8e28 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_middle_humanities_history.yaml @@ -0,0 +1,10 @@ +"dataset_name": "middle_humanities_history" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_humanities_egy" +"task": "AraDiCE_ArabicMMLU_middle_humanities_history_egy" +"task_alias": "middle humanities history" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_middle_humanities_islamic-studies.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_middle_humanities_islamic-studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..41995c4aa3122b88018270586d0622b4e4c839c1 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_middle_humanities_islamic-studies.yaml @@ -0,0 +1,10 @@ +"dataset_name": "middle_humanities_islamic-studies" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_humanities_egy" +"task": "AraDiCE_ArabicMMLU_middle_humanities_islamic-studies_egy" +"task_alias": "middle humanities islamic-studies" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_middle_language_arabic-language.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_middle_language_arabic-language.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e33bf590a19b7a8c42f1b52c529b5c7df4dec731 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_middle_language_arabic-language.yaml @@ -0,0 +1,10 @@ +"dataset_name": "middle_language_arabic-language" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_language_egy" +"task": "AraDiCE_ArabicMMLU_middle_language_arabic-language_egy" +"task_alias": "middle language arabic-language" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_middle_other_general-knowledge.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_middle_other_general-knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..73fc902702363d8f1793f59814357b6356ba2d61 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_middle_other_general-knowledge.yaml @@ -0,0 +1,10 @@ +"dataset_name": "middle_other_general-knowledge" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_other_egy" +"task": "AraDiCE_ArabicMMLU_middle_other_general-knowledge_egy" +"task_alias": "middle other general-knowledge" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_middle_social-science_civics.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_middle_social-science_civics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8407f36e7f356f75d1df8f7ce51f65734a7700bb --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_middle_social-science_civics.yaml @@ -0,0 +1,10 @@ +"dataset_name": "middle_social-science_civics" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_social-science_egy" +"task": "AraDiCE_ArabicMMLU_middle_social-science_civics_egy" +"task_alias": "middle social-science civics" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_middle_social-science_economics.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_middle_social-science_economics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fbcb040d27ea95bded3a8043d984ad67ebe9eb19 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_middle_social-science_economics.yaml @@ -0,0 +1,10 @@ +"dataset_name": "middle_social-science_economics" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_social-science_egy" +"task": "AraDiCE_ArabicMMLU_middle_social-science_economics_egy" +"task_alias": "middle social-science economics" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_middle_social-science_geography.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_middle_social-science_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..57fe94f29453346c8bb30077016dc24574fbd4cc --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_middle_social-science_geography.yaml @@ -0,0 +1,10 @@ +"dataset_name": "middle_social-science_geography" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_social-science_egy" +"task": "AraDiCE_ArabicMMLU_middle_social-science_geography_egy" +"task_alias": "middle social-science geography" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_middle_social-science_social-science.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_middle_social-science_social-science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..115170b8cc57e5365b0e4a57660c2bf70b3b3de9 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_middle_social-science_social-science.yaml @@ -0,0 +1,10 @@ +"dataset_name": "middle_social-science_social-science" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_social-science_egy" +"task": "AraDiCE_ArabicMMLU_middle_social-science_social-science_egy" +"task_alias": "middle social-science social-science" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_middle_stem_computer-science.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_middle_stem_computer-science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d8787e3c065b0d6ac941624a5e8273b3f195fbf --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_middle_stem_computer-science.yaml @@ -0,0 +1,10 @@ +"dataset_name": "middle_stem_computer-science" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_stem_egy" +"task": "AraDiCE_ArabicMMLU_middle_stem_computer-science_egy" +"task_alias": "middle stem computer-science" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_middle_stem_natural-science.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_middle_stem_natural-science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ee09058ce4b90040f387fae7ac836f5e81d1177e --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_middle_stem_natural-science.yaml @@ -0,0 +1,10 @@ +"dataset_name": "middle_stem_natural-science" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_stem_egy" +"task": "AraDiCE_ArabicMMLU_middle_stem_natural-science_egy" +"task_alias": "middle stem natural-science" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_na_humanities_islamic-studies.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_na_humanities_islamic-studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..995aa28c2f55ba68c915c1feb69abab239dcb61d --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_na_humanities_islamic-studies.yaml @@ -0,0 +1,10 @@ +"dataset_name": "na_humanities_islamic-studies" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_humanities_egy" +"task": "AraDiCE_ArabicMMLU_na_humanities_islamic-studies_egy" +"task_alias": "na humanities islamic-studies" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_na_language_arabic-language-general.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_na_language_arabic-language-general.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8691250702eccbb58651aec19f77c0ec9cf9b419 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_na_language_arabic-language-general.yaml @@ -0,0 +1,10 @@ +"dataset_name": "na_language_arabic-language-general" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_language_egy" +"task": "AraDiCE_ArabicMMLU_na_language_arabic-language-general_egy" +"task_alias": "na language arabic-language-general" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_na_language_arabic-language-grammar.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_na_language_arabic-language-grammar.yaml new file mode 100644 index 0000000000000000000000000000000000000000..453e41435dc1a85a2b7860bafebaf91a188bd307 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_na_language_arabic-language-grammar.yaml @@ -0,0 +1,10 @@ +"dataset_name": "na_language_arabic-language-grammar" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_language_egy" +"task": "AraDiCE_ArabicMMLU_na_language_arabic-language-grammar_egy" +"task_alias": "na language arabic-language-grammar" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_na_other_driving-test.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_na_other_driving-test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..abc097210fdb0e90aaab2c344c40648daf9c4ba0 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_na_other_driving-test.yaml @@ -0,0 +1,10 @@ +"dataset_name": "na_other_driving-test" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_other_egy" +"task": "AraDiCE_ArabicMMLU_na_other_driving-test_egy" +"task_alias": "na other driving-test" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_na_other_general-knowledge.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_na_other_general-knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..72af8e7f5310fd895fca58e74fdd5e28119084c9 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_na_other_general-knowledge.yaml @@ -0,0 +1,10 @@ +"dataset_name": "na_other_general-knowledge" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_other_egy" +"task": "AraDiCE_ArabicMMLU_na_other_general-knowledge_egy" +"task_alias": "na other general-knowledge" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_primary_humanities_history.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_primary_humanities_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e640faa54b05b6f7234896207926a28000c0dfc --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_primary_humanities_history.yaml @@ -0,0 +1,10 @@ +"dataset_name": "primary_humanities_history" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_humanities_egy" +"task": "AraDiCE_ArabicMMLU_primary_humanities_history_egy" +"task_alias": "primary humanities history" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_primary_humanities_islamic-studies.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_primary_humanities_islamic-studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..120dfa14350e6025f94636be53828ef14ee5fafe --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_primary_humanities_islamic-studies.yaml @@ -0,0 +1,10 @@ +"dataset_name": "primary_humanities_islamic-studies" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_humanities_egy" +"task": "AraDiCE_ArabicMMLU_primary_humanities_islamic-studies_egy" +"task_alias": "primary humanities islamic-studies" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_primary_language_arabic-language.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_primary_language_arabic-language.yaml new file mode 100644 index 0000000000000000000000000000000000000000..57c460a01b329f9a21605bf7121ef10162d597b1 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_primary_language_arabic-language.yaml @@ -0,0 +1,10 @@ +"dataset_name": "primary_language_arabic-language" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_language_egy" +"task": "AraDiCE_ArabicMMLU_primary_language_arabic-language_egy" +"task_alias": "primary language arabic-language" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_primary_other_general-knowledge.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_primary_other_general-knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..61314bf18263111a8449de0728acfe7237c20c15 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_primary_other_general-knowledge.yaml @@ -0,0 +1,10 @@ +"dataset_name": "primary_other_general-knowledge" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_other_egy" +"task": "AraDiCE_ArabicMMLU_primary_other_general-knowledge_egy" +"task_alias": "primary other general-knowledge" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_primary_social-science_geography.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_primary_social-science_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..73b8deea7adfd2d9f02583c5b60a485e0c59c0fa --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_primary_social-science_geography.yaml @@ -0,0 +1,10 @@ +"dataset_name": "primary_social-science_geography" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_social-science_egy" +"task": "AraDiCE_ArabicMMLU_primary_social-science_geography_egy" +"task_alias": "primary social-science geography" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_primary_social-science_social-science.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_primary_social-science_social-science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f03bb4ba0560e1a1ccd9ec1451bf87f605ef954 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_primary_social-science_social-science.yaml @@ -0,0 +1,10 @@ +"dataset_name": "primary_social-science_social-science" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_social-science_egy" +"task": "AraDiCE_ArabicMMLU_primary_social-science_social-science_egy" +"task_alias": "primary social-science social-science" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_primary_stem_computer-science.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_primary_stem_computer-science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e25856ebede95dda2f87f3ac6c5ae372d67d38e --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_primary_stem_computer-science.yaml @@ -0,0 +1,10 @@ +"dataset_name": "primary_stem_computer-science" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_stem_egy" +"task": "AraDiCE_ArabicMMLU_primary_stem_computer-science_egy" +"task_alias": "primary stem computer-science" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_primary_stem_math.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_primary_stem_math.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d4e85ac27ff1da976b8839e033de7d23f0ea7ec8 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_primary_stem_math.yaml @@ -0,0 +1,10 @@ +"dataset_name": "primary_stem_math" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_stem_egy" +"task": "AraDiCE_ArabicMMLU_primary_stem_math_egy" +"task_alias": "primary stem math" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_primary_stem_natural-science.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_primary_stem_natural-science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..04591fcd81028726441b2bfb66d7314919c51e17 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_primary_stem_natural-science.yaml @@ -0,0 +1,10 @@ +"dataset_name": "primary_stem_natural-science" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_stem_egy" +"task": "AraDiCE_ArabicMMLU_primary_stem_natural-science_egy" +"task_alias": "primary stem natural-science" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_prof_humanities_law.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_prof_humanities_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4fd3e166cb1a34171c3c7950b2f7218506acf905 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_prof_humanities_law.yaml @@ -0,0 +1,10 @@ +"dataset_name": "prof_humanities_law" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_humanities_egy" +"task": "AraDiCE_ArabicMMLU_prof_humanities_law_egy" +"task_alias": "prof humanities law" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_univ_other_management.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_univ_other_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6b985e979f3ce280e83fda391f9ec95489df5bde --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_univ_other_management.yaml @@ -0,0 +1,10 @@ +"dataset_name": "univ_other_management" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_other_egy" +"task": "AraDiCE_ArabicMMLU_univ_other_management_egy" +"task_alias": "univ other management" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_univ_social-science_accounting.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_univ_social-science_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..48ec0e75d852057e4080f3dc4ea84c417beafaf7 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_univ_social-science_accounting.yaml @@ -0,0 +1,10 @@ +"dataset_name": "univ_social-science_accounting" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_social-science_egy" +"task": "AraDiCE_ArabicMMLU_univ_social-science_accounting_egy" +"task_alias": "univ social-science accounting" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_univ_social-science_economics.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_univ_social-science_economics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3dd4dcc0a20dd1d7a555820245fa1ccfcbc5b258 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_univ_social-science_economics.yaml @@ -0,0 +1,10 @@ +"dataset_name": "univ_social-science_economics" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_social-science_egy" +"task": "AraDiCE_ArabicMMLU_univ_social-science_economics_egy" +"task_alias": "univ social-science economics" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_univ_social-science_political-science.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_univ_social-science_political-science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..671b0b3eb94699cce9440893538a8ad9622fa909 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_univ_social-science_political-science.yaml @@ -0,0 +1,10 @@ +"dataset_name": "univ_social-science_political-science" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_social-science_egy" +"task": "AraDiCE_ArabicMMLU_univ_social-science_political-science_egy" +"task_alias": "univ social-science political-science" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_univ_stem_computer-science.yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_univ_stem_computer-science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..49e2e5b67c73b0e4316e8d7ca94eeb1549e357f6 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/AraDiCE_ArabicMMLU_univ_stem_computer-science.yaml @@ -0,0 +1,10 @@ +"dataset_name": "univ_stem_computer-science" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_stem_egy" +"task": "AraDiCE_ArabicMMLU_univ_stem_computer-science_egy" +"task_alias": "univ stem computer-science" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/_default_template_yaml b/lm_eval/tasks/aradice/ArabicMMLU/EGY/_default_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..6421888a23a376727abc20207dcb0fcd503a7de6 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/_default_template_yaml @@ -0,0 +1,20 @@ +dataset_path: "QCRI/AraDICE-ArabicMMLU-egy" +fewshot_config: + sampler: default +output_type: multiple_choice +process_docs: !function utils.process_docs +doc_to_text: "{{prompt}}" +doc_to_choice: choices +doc_to_target: target +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: f1 + higher_is_better: true + aggregation: !function metrics.micro_f1_score +metadata: + version: 0.0 diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/metrics.py b/lm_eval/tasks/aradice/ArabicMMLU/EGY/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..47e49ded46082847d73fce85e2db37556fafa877 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/metrics.py @@ -0,0 +1,25 @@ +from sklearn.metrics import f1_score + + +def macro_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="macro") + return fscore + + +def micro_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="micro") + return fscore + + +def weighted_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="weighted") + return fscore diff --git a/lm_eval/tasks/aradice/ArabicMMLU/EGY/utils.py b/lm_eval/tasks/aradice/ArabicMMLU/EGY/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..640b9a0f2ccb73c6784ea3c9749e2e490797d877 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/EGY/utils.py @@ -0,0 +1,87 @@ +level_ar = { + "Primary": "للمرحلة الابتدائية", + "Middle": "للمرحلة المتوسطة", + "High": "للمرحلة الثانوية", + "Univ": "للمرحلة الجامعية ", + "Prof": "للمحترفين", +} + +country_ar = { + "UAE": "في الإمارات", + "Egypt": "في مصر", + "Lebanon": "في لبنان", + "Jordan": "في الأردن", + "Kuwait": "في الكويت", + "KSA": "في السعودية", + "Palestine": "في فلسطين", + "Morocco": "في المغرب", +} + +subject_ar = { + "Islamic Studies": "في الدراسات إسلامية", + "Driving Test": "في اختبار القيادة", + "Natural Science": "في العلوم الطبيعية", + "History": "في مادة التاريخ", + "General Knowledge": "في المعرفة العامة", + "Law": "في القانون", + "Physics": "في الفيزياء", + "Social Science": "في العلوم الاجتماعية", + "Management": "في الإدارة", + "Arabic Language": "في اللغة العربية", + "Political Science": " في العلوم السياسية", + "Philosophy": "في الفلسفة", + "Accounting": "في المحاسبة", + "Computer Science": "في علوم الحاسوب", + "Geography": "في الجغرافيا", + "Math": "في الرياضيات", + "Biology": "في علم الأحياء", + "Economics": "في الاقتصاد", + "Arabic Language (General)": "في اللغة العربية (عام)", + "Arabic Language (Grammar)": "في اللغة العربية (النحو)", + "Civics": "في التربية المدنية", +} + + +alpa_ar = ["أ-", "ب-", "ج-", "د-", "و-"] +alpa_en = ["A-", "B-", "C-", "D-", "E-"] +all_choices = ["أ", "ب", "ج", "د", "و"] +all_choices_en = ["A", "B", "C", "D", "E"] + + +def process_docs(dataset): + def _helper(doc): + # modifies the contents of a single + # document in our dataset. + + PROMPT = "ده سؤال [MAIN_META_DATA]. اختار الإجابة الصحيحة!\n\nسؤال: [INPUT]\n[OPTION]" + PROMPT = f"{PROMPT}\n\nإجابة:" + alpa = alpa_ar + subject = subject_ar[doc["Subject"]] + level = " " + level_ar[doc["Level"]] if doc["Level"] else "" + country = " " + country_ar[doc["Country"]] if doc["Country"] else "" + main_meta_data = f"{subject}{level}{country}" + + question = ( + f"{doc['context']}\n\n{doc['question']}" + if doc["context"] + else doc["question"] + ) + options = [] + for i, opt in enumerate(["A", "B", "C", "D", "E"]): + if opt not in doc["options"] or doc["options"][opt] is None: + break + options.append(f"{alpa[i]} {doc['options'][opt]}") + + doc["prompt"] = ( + PROMPT.replace("[MAIN_META_DATA]", main_meta_data) + .replace("[INPUT]", question) + .replace("[OPTION]", "\n".join(options)) + ) + + doc["choices"] = all_choices[: len(options)] + + doc["target"] = ["A", "B", "C", "D", "E"].index(doc["Answer Key"]) + + return doc + + return dataset.map(_helper) # returns back a datasets.Dataset object diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU.yaml new file mode 100644 index 0000000000000000000000000000000000000000..df64389d8ece88b80a4029845f09f810131da7fe --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU.yaml @@ -0,0 +1,12 @@ +group: AraDiCE_ArabicMMLU_lev +task: +- AraDiCE_ArabicMMLU_humanities_lev +- AraDiCE_ArabicMMLU_language_lev +- AraDiCE_ArabicMMLU_social-science_lev +- AraDiCE_ArabicMMLU_stem_lev +- AraDiCE_ArabicMMLU_other_lev +aggregate_metric_list: + - metric: acc + weight_by_size: True + - metric: acc_norm + weight_by_size: True diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_high_humanities_history.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_high_humanities_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fbe1838c0f9ad4c741b58b457771f01c3e109fad --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_high_humanities_history.yaml @@ -0,0 +1,10 @@ +"dataset_name": "high_humanities_history" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_humanities_lev" +"task": "AraDiCE_ArabicMMLU_high_humanities_history_lev" +"task_alias": "high humanities history" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_high_humanities_islamic-studies.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_high_humanities_islamic-studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2e1d874eaf0ea69031c06aa947bac25839286b69 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_high_humanities_islamic-studies.yaml @@ -0,0 +1,10 @@ +"dataset_name": "high_humanities_islamic-studies" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_humanities_lev" +"task": "AraDiCE_ArabicMMLU_high_humanities_islamic-studies_lev" +"task_alias": "high humanities islamic-studies" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_high_humanities_philosophy.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_high_humanities_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..752a95f3db174d00f2de8c13fafe5512aede2467 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_high_humanities_philosophy.yaml @@ -0,0 +1,10 @@ +"dataset_name": "high_humanities_philosophy" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_humanities_lev" +"task": "AraDiCE_ArabicMMLU_high_humanities_philosophy_lev" +"task_alias": "high humanities philosophy" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_high_language_arabic-language.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_high_language_arabic-language.yaml new file mode 100644 index 0000000000000000000000000000000000000000..27d14f96d16d01469dfe2b9b054c9e3223f2e421 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_high_language_arabic-language.yaml @@ -0,0 +1,10 @@ +"dataset_name": "high_language_arabic-language" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_language_lev" +"task": "AraDiCE_ArabicMMLU_high_language_arabic-language_lev" +"task_alias": "high language arabic-language" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_high_social-science_civics.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_high_social-science_civics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..29d1a5205ec00a1d74b01c207ce19990b05c692a --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_high_social-science_civics.yaml @@ -0,0 +1,10 @@ +"dataset_name": "high_social-science_civics" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_social-science_lev" +"task": "AraDiCE_ArabicMMLU_high_social-science_civics_lev" +"task_alias": "high social-science civics" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_high_social-science_economics.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_high_social-science_economics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..378587a8feba7b2fb6745425025a5748c6cd634c --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_high_social-science_economics.yaml @@ -0,0 +1,10 @@ +"dataset_name": "high_social-science_economics" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_social-science_lev" +"task": "AraDiCE_ArabicMMLU_high_social-science_economics_lev" +"task_alias": "high social-science economics" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_high_social-science_geography.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_high_social-science_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..11668a5f0b10e588e86da989e40863e4d31c6e32 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_high_social-science_geography.yaml @@ -0,0 +1,10 @@ +"dataset_name": "high_social-science_geography" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_social-science_lev" +"task": "AraDiCE_ArabicMMLU_high_social-science_geography_lev" +"task_alias": "high social-science geography" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_high_stem_biology.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_high_stem_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..80900b2f52c50ca7a3c0b0657a22cb81be951fbd --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_high_stem_biology.yaml @@ -0,0 +1,10 @@ +"dataset_name": "high_stem_biology" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_stem_lev" +"task": "AraDiCE_ArabicMMLU_high_stem_biology_lev" +"task_alias": "high stem biology" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_high_stem_computer-science.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_high_stem_computer-science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eca96f2c6edd5c1c67e828dec9a071a6b5b733d3 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_high_stem_computer-science.yaml @@ -0,0 +1,10 @@ +"dataset_name": "high_stem_computer-science" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_stem_lev" +"task": "AraDiCE_ArabicMMLU_high_stem_computer-science_lev" +"task_alias": "high stem computer-science" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_high_stem_physics.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_high_stem_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d21bcc69ffa6a500d64d87b424ae720f0177e26 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_high_stem_physics.yaml @@ -0,0 +1,10 @@ +"dataset_name": "high_stem_physics" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_stem_lev" +"task": "AraDiCE_ArabicMMLU_high_stem_physics_lev" +"task_alias": "high stem physics" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_middle_humanities_history.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_middle_humanities_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8dd3cfb9e1a3db0af98b158759618fa994792437 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_middle_humanities_history.yaml @@ -0,0 +1,10 @@ +"dataset_name": "middle_humanities_history" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_humanities_lev" +"task": "AraDiCE_ArabicMMLU_middle_humanities_history_lev" +"task_alias": "middle humanities history" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_middle_humanities_islamic-studies.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_middle_humanities_islamic-studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7e5490e4803a62886e5fa8bd342ee32697fbd96d --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_middle_humanities_islamic-studies.yaml @@ -0,0 +1,10 @@ +"dataset_name": "middle_humanities_islamic-studies" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_humanities_lev" +"task": "AraDiCE_ArabicMMLU_middle_humanities_islamic-studies_lev" +"task_alias": "middle humanities islamic-studies" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_middle_language_arabic-language.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_middle_language_arabic-language.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b67e3be59c3c5a13504004fdcca4f1b4c3df397d --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_middle_language_arabic-language.yaml @@ -0,0 +1,10 @@ +"dataset_name": "middle_language_arabic-language" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_language_lev" +"task": "AraDiCE_ArabicMMLU_middle_language_arabic-language_lev" +"task_alias": "middle language arabic-language" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_middle_other_general-knowledge.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_middle_other_general-knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bd43ebe3ddabeb27f725fc41a8ea42b3a8a562a1 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_middle_other_general-knowledge.yaml @@ -0,0 +1,10 @@ +"dataset_name": "middle_other_general-knowledge" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_other_lev" +"task": "AraDiCE_ArabicMMLU_middle_other_general-knowledge_lev" +"task_alias": "middle other general-knowledge" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_middle_social-science_civics.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_middle_social-science_civics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a18665cf01c3af01b71e61e11586fa3e91008d43 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_middle_social-science_civics.yaml @@ -0,0 +1,10 @@ +"dataset_name": "middle_social-science_civics" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_social-science_lev" +"task": "AraDiCE_ArabicMMLU_middle_social-science_civics_lev" +"task_alias": "middle social-science civics" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_middle_social-science_economics.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_middle_social-science_economics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e1de265b6b9f530aaea9d7766a1ea72e8fbbc6d8 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_middle_social-science_economics.yaml @@ -0,0 +1,10 @@ +"dataset_name": "middle_social-science_economics" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_social-science_lev" +"task": "AraDiCE_ArabicMMLU_middle_social-science_economics_lev" +"task_alias": "middle social-science economics" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_middle_social-science_geography.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_middle_social-science_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..19083eb00c9cd48d780ebbd551bc34a84de7d611 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_middle_social-science_geography.yaml @@ -0,0 +1,10 @@ +"dataset_name": "middle_social-science_geography" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_social-science_lev" +"task": "AraDiCE_ArabicMMLU_middle_social-science_geography_lev" +"task_alias": "middle social-science geography" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_middle_social-science_social-science.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_middle_social-science_social-science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c7d19c7ea9817217ee11c2423e7c2905b8ecea7 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_middle_social-science_social-science.yaml @@ -0,0 +1,10 @@ +"dataset_name": "middle_social-science_social-science" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_social-science_lev" +"task": "AraDiCE_ArabicMMLU_middle_social-science_social-science_lev" +"task_alias": "middle social-science social-science" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_middle_stem_computer-science.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_middle_stem_computer-science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..583e29b103756dc04cd851e4b77302596a1637c9 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_middle_stem_computer-science.yaml @@ -0,0 +1,10 @@ +"dataset_name": "middle_stem_computer-science" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_stem_lev" +"task": "AraDiCE_ArabicMMLU_middle_stem_computer-science_lev" +"task_alias": "middle stem computer-science" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_middle_stem_natural-science.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_middle_stem_natural-science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a1904d2c8785b257e7aa8b2ee0021fa9a7ac0768 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_middle_stem_natural-science.yaml @@ -0,0 +1,10 @@ +"dataset_name": "middle_stem_natural-science" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_stem_lev" +"task": "AraDiCE_ArabicMMLU_middle_stem_natural-science_lev" +"task_alias": "middle stem natural-science" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_na_humanities_islamic-studies.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_na_humanities_islamic-studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ac0bfe8a061acebf853a6bc9908c70f0d8550ea1 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_na_humanities_islamic-studies.yaml @@ -0,0 +1,10 @@ +"dataset_name": "na_humanities_islamic-studies" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_humanities_lev" +"task": "AraDiCE_ArabicMMLU_na_humanities_islamic-studies_lev" +"task_alias": "na humanities islamic-studies" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_na_language_arabic-language-general.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_na_language_arabic-language-general.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f80e6e93e4007c4e3de7b6c885155bfc7b71f7bf --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_na_language_arabic-language-general.yaml @@ -0,0 +1,10 @@ +"dataset_name": "na_language_arabic-language-general" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_language_lev" +"task": "AraDiCE_ArabicMMLU_na_language_arabic-language-general_lev" +"task_alias": "na language arabic-language-general" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_na_language_arabic-language-grammar.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_na_language_arabic-language-grammar.yaml new file mode 100644 index 0000000000000000000000000000000000000000..af3943d9a8f59bf10c1decd7d56a497d45312cb6 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_na_language_arabic-language-grammar.yaml @@ -0,0 +1,10 @@ +"dataset_name": "na_language_arabic-language-grammar" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_language_lev" +"task": "AraDiCE_ArabicMMLU_na_language_arabic-language-grammar_lev" +"task_alias": "na language arabic-language-grammar" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_na_other_driving-test.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_na_other_driving-test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0af542f0d6ab8de6be76a898022ad4adb242520d --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_na_other_driving-test.yaml @@ -0,0 +1,10 @@ +"dataset_name": "na_other_driving-test" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_other_lev" +"task": "AraDiCE_ArabicMMLU_na_other_driving-test_lev" +"task_alias": "na other driving-test" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_na_other_general-knowledge.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_na_other_general-knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0c5669cf07cf6f2f05a10e40fe30208c3f857f24 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_na_other_general-knowledge.yaml @@ -0,0 +1,10 @@ +"dataset_name": "na_other_general-knowledge" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_other_lev" +"task": "AraDiCE_ArabicMMLU_na_other_general-knowledge_lev" +"task_alias": "na other general-knowledge" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_primary_humanities_history.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_primary_humanities_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..be32d433f76b171a1f076dec90dacfea7c11ea3f --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_primary_humanities_history.yaml @@ -0,0 +1,10 @@ +"dataset_name": "primary_humanities_history" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_humanities_lev" +"task": "AraDiCE_ArabicMMLU_primary_humanities_history_lev" +"task_alias": "primary humanities history" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_primary_humanities_islamic-studies.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_primary_humanities_islamic-studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9ae53b80ee7f011f12afbaee1d781194d228e41e --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_primary_humanities_islamic-studies.yaml @@ -0,0 +1,10 @@ +"dataset_name": "primary_humanities_islamic-studies" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_humanities_lev" +"task": "AraDiCE_ArabicMMLU_primary_humanities_islamic-studies_lev" +"task_alias": "primary humanities islamic-studies" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_primary_language_arabic-language.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_primary_language_arabic-language.yaml new file mode 100644 index 0000000000000000000000000000000000000000..15575b0513b242eb62e9c2b3a5dfce5351f5022f --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_primary_language_arabic-language.yaml @@ -0,0 +1,10 @@ +"dataset_name": "primary_language_arabic-language" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_language_lev" +"task": "AraDiCE_ArabicMMLU_primary_language_arabic-language_lev" +"task_alias": "primary language arabic-language" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_primary_other_general-knowledge.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_primary_other_general-knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..07b6692115f74d81f741d39e02914b980c66863a --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_primary_other_general-knowledge.yaml @@ -0,0 +1,10 @@ +"dataset_name": "primary_other_general-knowledge" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_other_lev" +"task": "AraDiCE_ArabicMMLU_primary_other_general-knowledge_lev" +"task_alias": "primary other general-knowledge" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_primary_social-science_geography.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_primary_social-science_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b43c49035cbe43d47d16f1783681b0ad0ceaafb0 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_primary_social-science_geography.yaml @@ -0,0 +1,10 @@ +"dataset_name": "primary_social-science_geography" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_social-science_lev" +"task": "AraDiCE_ArabicMMLU_primary_social-science_geography_lev" +"task_alias": "primary social-science geography" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_primary_social-science_social-science.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_primary_social-science_social-science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f9f093415267e3a2648bf27c494ba20154babba --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_primary_social-science_social-science.yaml @@ -0,0 +1,10 @@ +"dataset_name": "primary_social-science_social-science" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_social-science_lev" +"task": "AraDiCE_ArabicMMLU_primary_social-science_social-science_lev" +"task_alias": "primary social-science social-science" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_primary_stem_computer-science.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_primary_stem_computer-science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6a79f2e7a2f4c54ad67b97345abaa91d0857bce0 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_primary_stem_computer-science.yaml @@ -0,0 +1,10 @@ +"dataset_name": "primary_stem_computer-science" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_stem_lev" +"task": "AraDiCE_ArabicMMLU_primary_stem_computer-science_lev" +"task_alias": "primary stem computer-science" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_primary_stem_math.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_primary_stem_math.yaml new file mode 100644 index 0000000000000000000000000000000000000000..048c95096e7f4d0b9550ca511469ba08953a30df --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_primary_stem_math.yaml @@ -0,0 +1,10 @@ +"dataset_name": "primary_stem_math" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_stem_lev" +"task": "AraDiCE_ArabicMMLU_primary_stem_math_lev" +"task_alias": "primary stem math" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_primary_stem_natural-science.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_primary_stem_natural-science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6d7404ae7e7b6c172f35c9ec16caa942e2516f7b --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_primary_stem_natural-science.yaml @@ -0,0 +1,10 @@ +"dataset_name": "primary_stem_natural-science" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_stem_lev" +"task": "AraDiCE_ArabicMMLU_primary_stem_natural-science_lev" +"task_alias": "primary stem natural-science" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_prof_humanities_law.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_prof_humanities_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1c50cb9d913ec092ebd3dbbafc7165e786d81ef1 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_prof_humanities_law.yaml @@ -0,0 +1,10 @@ +"dataset_name": "prof_humanities_law" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_humanities_lev" +"task": "AraDiCE_ArabicMMLU_prof_humanities_law_lev" +"task_alias": "prof humanities law" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_univ_other_management.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_univ_other_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..31b79fd0c14a01dd6f2d7e79c0066b15edea1136 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_univ_other_management.yaml @@ -0,0 +1,10 @@ +"dataset_name": "univ_other_management" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_other_lev" +"task": "AraDiCE_ArabicMMLU_univ_other_management_lev" +"task_alias": "univ other management" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_univ_social-science_accounting.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_univ_social-science_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fc0cb68266fbcecd39c3a92b21dbd8223bf0f030 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_univ_social-science_accounting.yaml @@ -0,0 +1,10 @@ +"dataset_name": "univ_social-science_accounting" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_social-science_lev" +"task": "AraDiCE_ArabicMMLU_univ_social-science_accounting_lev" +"task_alias": "univ social-science accounting" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_univ_social-science_economics.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_univ_social-science_economics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..daec1b37a648c75ffc38bd531c4ec8a2c7365c9f --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_univ_social-science_economics.yaml @@ -0,0 +1,10 @@ +"dataset_name": "univ_social-science_economics" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_social-science_lev" +"task": "AraDiCE_ArabicMMLU_univ_social-science_economics_lev" +"task_alias": "univ social-science economics" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_univ_social-science_political-science.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_univ_social-science_political-science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69f63ca4d22e1c0cd63a00f5832844b5b89bc90 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_univ_social-science_political-science.yaml @@ -0,0 +1,10 @@ +"dataset_name": "univ_social-science_political-science" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_social-science_lev" +"task": "AraDiCE_ArabicMMLU_univ_social-science_political-science_lev" +"task_alias": "univ social-science political-science" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_univ_stem_computer-science.yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_univ_stem_computer-science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aeb8fa8118552fe3c4f0c75e701c1b8093b2cba5 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/AraDiCE_ArabicMMLU_univ_stem_computer-science.yaml @@ -0,0 +1,10 @@ +"dataset_name": "univ_stem_computer-science" +"description": "" +"fewshot_split": !!null "null" +"include": "_default_template_yaml" +"tag": "AraDiCE_ArabicMMLU_stem_lev" +"task": "AraDiCE_ArabicMMLU_univ_stem_computer-science_lev" +"task_alias": "univ stem computer-science" +"test_split": "test" +"training_split": !!null "null" +"validation_split": !!null "null" diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/_default_template_yaml b/lm_eval/tasks/aradice/ArabicMMLU/LEV/_default_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..45c5a345de1e2459c675b2d5ada4f6ec5fe5f090 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/_default_template_yaml @@ -0,0 +1,20 @@ +dataset_path: QCRI/AraDICE-ArabicMMLU-lev +fewshot_config: + sampler: default +output_type: multiple_choice +process_docs: !function utils.process_docs +doc_to_text: "{{prompt}}" +doc_to_choice: choices +doc_to_target: target +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: f1 + higher_is_better: true + aggregation: !function metrics.micro_f1_score +metadata: + version: 0.0 diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/metrics.py b/lm_eval/tasks/aradice/ArabicMMLU/LEV/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..47e49ded46082847d73fce85e2db37556fafa877 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/metrics.py @@ -0,0 +1,25 @@ +from sklearn.metrics import f1_score + + +def macro_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="macro") + return fscore + + +def micro_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="micro") + return fscore + + +def weighted_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="weighted") + return fscore diff --git a/lm_eval/tasks/aradice/ArabicMMLU/LEV/utils.py b/lm_eval/tasks/aradice/ArabicMMLU/LEV/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..37683c46e237fd3fcfc9e79cb6e861d089484090 --- /dev/null +++ b/lm_eval/tasks/aradice/ArabicMMLU/LEV/utils.py @@ -0,0 +1,94 @@ +level_ar = { + "Primary": "للمرحلة الابتدائية", + "Middle": "للمرحلة المتوسطة", + "High": "للمرحلة الثانوية", + "Univ": "للمرحلة الجامعية ", + "Prof": "للمحترفين", +} + +country_ar = { + "UAE": "بالإمارات", + "Egypt": "بمصر", + "Lebanon": "بلبنان", + "Jordan": "بالأردن", + "Kuwait": "بالكويت", + "KSA": "بالسعودية", + "Palestine": "بفلسطين", + "Morocco": "بالمغرب", +} + +subject_ar = { + "Islamic Studies": "عن الدراسات إسلامية", + "Driving Test": "عن فحص السواقة", + "Natural Science": "عن العلوم الطبيعية", + "History": "تاريخ", + "General Knowledge": "معرفة عامة", + "Law": "عن القانون", + "Physics": "فيزياء", + "Social Science": "علوم اجتماعية", + "Management": "عن الإدارة", + "Arabic Language": "عن اللغة العربية", + "Political Science": " عن العلوم السياسية", + "Philosophy": "فلسفة", + "Accounting": "محاسبة", + "Computer Science": "عن علوم الحاسوب", + "Geography": "جغرافيا", + "Math": "رياضيات", + "Biology": "بيولوجي", + "Economics": "اقتصاد", + "Arabic Language (General)": "لغة العربية (عام)", + "Arabic Language (Grammar)": "لغة العربية (نحو)", + "Civics": "تربية مدنية", +} + +alpa_ar = ["أ-", "ب-", "ج-", "د-", "و-"] +alpa_en = ["A-", "B-", "C-", "D-", "E-"] +all_choices = ["أ", "ب", "ج", "د", "و"] +all_choices_en = ["A", "B", "C", "D", "E"] + + +def process_docs(dataset): + def _helper(doc): + # modifies the contents of a single + # document in our dataset. + PROMPT = ( + "هيدا سؤال [MAIN_META_DATA]. نقي الجواب الصح!\n\nسؤال: [INPUT]\n[OPTION]" + ) + + # if args.lora_weights == "x": + PROMPT = f"{PROMPT}\n\nالجواب:" + # else: + # PROMPT = f'### Input:{PROMPT}\n\n### Output:\n' + + alpa = alpa_ar + + subject = subject_ar[doc["Subject"]] + level = " " + level_ar[doc["Level"]] if doc["Level"] else "" + country = " " + country_ar[doc["Country"]] if doc["Country"] else "" + main_meta_data = f"{subject}{level}{country}" + + question = ( + f"{doc['context']}\n\n{doc['question']}" + if doc["context"] + else doc["question"] + ) + options = [] + + for i, opt in enumerate(["A", "B", "C", "D", "E"]): + if opt not in doc["options"] or doc["options"][opt] is None: + break + options.append(f"{alpa[i]} {doc['options'][opt]}") + + doc["prompt"] = ( + PROMPT.replace("[MAIN_META_DATA]", main_meta_data) + .replace("[INPUT]", question) + .replace("[OPTION]", "\n".join(options)) + ) + + doc["choices"] = all_choices[: len(options)] + + doc["target"] = ["A", "B", "C", "D", "E"].index(doc["Answer Key"]) + + return doc + + return dataset.map(_helper) diff --git a/lm_eval/tasks/aradice/README.md b/lm_eval/tasks/aradice/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c0f1043df5e2048af610bf101fd3b4d390611533 --- /dev/null +++ b/lm_eval/tasks/aradice/README.md @@ -0,0 +1,49 @@ +# AraDiCE + +### Paper + +**Title:** AraDiCE: Benchmarks for Dialectal and Cultural Capabilities in LLMs + +**Abstract:** Arabic, with its rich diversity of dialects, remains significantly underrepresented in Large Language Models, particularly in dialectal variations. We address this gap by introducing seven synthetic datasets in dialects alongside Modern Standard Arabic (MSA), created using Machine Translation (MT) combined with human post-editing. We present AraDiCE, a benchmark for Arabic Dialect and Cultural Evaluation. We evaluate LLMs on dialect comprehension and generation, focusing specifically on low-resource Arabic dialects. Additionally, we introduce the first-ever fine-grained benchmark designed to evaluate cultural awareness across the Gulf, Egypt, and Levant regions, providing a novel dimension to LLM evaluation. Our findings demonstrate that while Arabic-specific models like Jais and AceGPT outperform multilingual models on dialectal tasks, significant challenges persist in dialect identification, generation, and translation. This work contributes ~45K post-edited samples, a cultural benchmark, and highlights the importance of tailored training to improve LLM performance in capturing the nuances of diverse Arabic dialects and cultural contexts. We will release the dialectal translation models and benchmarks curated in this study. + +**Homepage:** +https://huggingface.co/datasets/QCRI/AraDiCE + + + +### Citation + +``` +@article{mousi2024aradicebenchmarksdialectalcultural, + title={{AraDiCE}: Benchmarks for Dialectal and Cultural Capabilities in LLMs}, + author={Basel Mousi and Nadir Durrani and Fatema Ahmad and Md. Arid Hasan and Maram Hasanain and Tameem Kabbani and Fahim Dalvi and Shammur Absar Chowdhury and Firoj Alam}, + year={2024}, + publisher={arXiv:2409.11404}, + url={https://arxiv.org/abs/2409.11404}, +} +``` + +### Groups, Tags, and Tasks + +#### Groups + +* `AraDiCE`: Overall results for all tasks associated with different datasets. + + +#### Tasks + +* `aradice`: Overall results for all tasks associated with different datasets. +* `arabicmmlu`: TODO + + +### Checklist + +* [x] Is the task an existing benchmark in the literature? + * [x] Have you referenced the original paper that introduced the task? + * [x] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test? + + +If other tasks on this dataset are already supported: +* [x] Is the "Main" variant of this task clearly denoted? +* [x] Have you provided a short sentence in a README on what each new variant adds / evaluates? +* [x] Have you noted which, if any, published evaluation setups are matched by this variant? diff --git a/lm_eval/tasks/aradice/aradice.yaml b/lm_eval/tasks/aradice/aradice.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8c7759f2c38a88289050771d2b044ebc6a1abf2f --- /dev/null +++ b/lm_eval/tasks/aradice/aradice.yaml @@ -0,0 +1,30 @@ +group: AraDiCE +task: +- AraDiCE_ArabicMMLU_lev +- AraDiCE_ArabicMMLU_egy +- AraDiCE_boolq_egy +- AraDiCE_boolq_eng +- AraDiCE_boolq_lev +- AraDiCE_boolq_msa +- AraDiCE_egypt_cultural +- AraDiCE_jordan_cultural +- AraDiCE_lebanon_cultural +- AraDiCE_palestine_cultural +- AraDiCE_qatar_cultural +- AraDiCE_syria_cultural +- AraDiCE_openbookqa_egy +- AraDiCE_openbookqa_eng +- AraDiCE_openbookqa_lev +- AraDiCE_openbookqa_msa +- AraDiCE_piqa_egy +- AraDiCE_piqa_eng +- AraDiCE_piqa_lev +- AraDiCE_piqa_msa +- AraDiCE_truthfulqa_mc1_egy +- AraDiCE_truthfulqa_mc1_eng +- AraDiCE_truthfulqa_mc1_lev +- AraDiCE_truthfulqa_mc1_msa +- AraDiCE_winogrande_egy +- AraDiCE_winogrande_eng +- AraDiCE_winogrande_lev +- AraDiCE_winogrande_msa diff --git a/lm_eval/tasks/aradice/boolq/EGY/boolq_egy.yaml b/lm_eval/tasks/aradice/boolq/EGY/boolq_egy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c481c24a750c83d689a6a1dd7e3efd233e797193 --- /dev/null +++ b/lm_eval/tasks/aradice/boolq/EGY/boolq_egy.yaml @@ -0,0 +1,25 @@ +task: AraDiCE_boolq_egy +dataset_path: QCRI/AraDiCE-BoolQ +dataset_name: BoolQ-egy +output_type: multiple_choice +training_split: null +validation_split: null +test_split: test +process_docs: !function utils.process_docs +doc_to_text: "{{passage}}\nسؤال: {{question}}؟\nجواب:" +doc_to_target: target +doc_to_choice: ["لا", "نعم"] +should_decontaminate: true +doc_to_decontamination_query: passage +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: f1 + higher_is_better: true + aggregation: !function metrics.micro_f1_score +metadata: + version: 1.0 diff --git a/lm_eval/tasks/aradice/boolq/EGY/metrics.py b/lm_eval/tasks/aradice/boolq/EGY/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..47e49ded46082847d73fce85e2db37556fafa877 --- /dev/null +++ b/lm_eval/tasks/aradice/boolq/EGY/metrics.py @@ -0,0 +1,25 @@ +from sklearn.metrics import f1_score + + +def macro_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="macro") + return fscore + + +def micro_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="micro") + return fscore + + +def weighted_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="weighted") + return fscore diff --git a/lm_eval/tasks/aradice/boolq/EGY/utils.py b/lm_eval/tasks/aradice/boolq/EGY/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..4220133e5d5cf710d96a7a915b3ec8db7d8a03db --- /dev/null +++ b/lm_eval/tasks/aradice/boolq/EGY/utils.py @@ -0,0 +1,18 @@ +egy_answer_mapping = {"true": "نعم", "false": "لا", True: "نعم", False: "لا"} + + +def process_docs(dataset): + def remove_question_mark(text): + text = text.strip() + if text.endswith("?") or text.endswith("؟"): + text = text[:-1] + text = text.strip() + + return text + + def _helper(doc): + doc["question"] = remove_question_mark(doc["question"]) + doc["target"] = egy_answer_mapping[doc["answer"]] + return doc + + return dataset.map(_helper) diff --git a/lm_eval/tasks/aradice/boolq/ENG/boolq_eng.yaml b/lm_eval/tasks/aradice/boolq/ENG/boolq_eng.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1409aebfab9a95e84a54899d7e958445400cd535 --- /dev/null +++ b/lm_eval/tasks/aradice/boolq/ENG/boolq_eng.yaml @@ -0,0 +1,25 @@ +task: AraDiCE_boolq_eng +dataset_path: QCRI/AraDiCE-BoolQ +dataset_name: BoolQ-eng +output_type: multiple_choice +training_split: null +validation_split: null +test_split: test +process_docs: !function utils.process_docs +doc_to_text: "{{passage}}\nQuestion: {{question}}?\nAnswer:" +doc_to_target: target +doc_to_choice: ["no", "yes"] +should_decontaminate: true +doc_to_decontamination_query: passage +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: f1 + higher_is_better: true + aggregation: !function metrics.micro_f1_score +metadata: + version: 1.0 diff --git a/lm_eval/tasks/aradice/boolq/ENG/metrics.py b/lm_eval/tasks/aradice/boolq/ENG/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..47e49ded46082847d73fce85e2db37556fafa877 --- /dev/null +++ b/lm_eval/tasks/aradice/boolq/ENG/metrics.py @@ -0,0 +1,25 @@ +from sklearn.metrics import f1_score + + +def macro_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="macro") + return fscore + + +def micro_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="micro") + return fscore + + +def weighted_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="weighted") + return fscore diff --git a/lm_eval/tasks/aradice/boolq/ENG/utils.py b/lm_eval/tasks/aradice/boolq/ENG/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..3f1233cddf3b9881fd04da5d047ddd7a3a1f9668 --- /dev/null +++ b/lm_eval/tasks/aradice/boolq/ENG/utils.py @@ -0,0 +1,18 @@ +en_answer_mapping = {"true": "yes", "false": "no", True: "yes", False: "no"} + + +def process_docs(dataset): + def remove_question_mark(text): + text = text.strip() + if text.endswith("?") or text.endswith("؟"): + text = text[:-1] + text = text.strip() + + return text + + def _helper(doc): + doc["question"] = remove_question_mark(doc["question"]) + doc["target"] = en_answer_mapping[doc["answer"]] + return doc + + return dataset.map(_helper) diff --git a/lm_eval/tasks/aradice/boolq/LEV/boolq_lev.yaml b/lm_eval/tasks/aradice/boolq/LEV/boolq_lev.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ccbe94770166f7e7c3ebdc849d87773e5e7f3163 --- /dev/null +++ b/lm_eval/tasks/aradice/boolq/LEV/boolq_lev.yaml @@ -0,0 +1,25 @@ +task: AraDiCE_boolq_lev +dataset_path: QCRI/AraDiCE-BoolQ +dataset_name: BoolQ-lev +output_type: multiple_choice +training_split: null +validation_split: null +test_split: test +process_docs: !function utils.process_docs +doc_to_text: "{{passage}}\nسؤال: {{question}}؟\nجواب:" +doc_to_target: target +doc_to_choice: ["لا", "نعم"] +should_decontaminate: true +doc_to_decontamination_query: passage +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: f1 + higher_is_better: true + aggregation: !function metrics.micro_f1_score +metadata: + version: 1.0 diff --git a/lm_eval/tasks/aradice/boolq/LEV/metrics.py b/lm_eval/tasks/aradice/boolq/LEV/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..47e49ded46082847d73fce85e2db37556fafa877 --- /dev/null +++ b/lm_eval/tasks/aradice/boolq/LEV/metrics.py @@ -0,0 +1,25 @@ +from sklearn.metrics import f1_score + + +def macro_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="macro") + return fscore + + +def micro_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="micro") + return fscore + + +def weighted_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="weighted") + return fscore diff --git a/lm_eval/tasks/aradice/boolq/LEV/utils.py b/lm_eval/tasks/aradice/boolq/LEV/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..3f601229a255ceedd49b5784e025bf3fd0472ade --- /dev/null +++ b/lm_eval/tasks/aradice/boolq/LEV/utils.py @@ -0,0 +1,18 @@ +lev_answer_mapping = {"true": "نعم", "false": "لا", True: "نعم", False: "لا"} + + +def process_docs(dataset): + def remove_question_mark(text): + text = text.strip() + if text.endswith("?") or text.endswith("؟"): + text = text[:-1] + text = text.strip() + + return text + + def _helper(doc): + doc["question"] = remove_question_mark(doc["question"]) + doc["target"] = lev_answer_mapping[doc["answer"]] + return doc + + return dataset.map(_helper) diff --git a/lm_eval/tasks/aradice/boolq/MSA/boolq_msa.yaml b/lm_eval/tasks/aradice/boolq/MSA/boolq_msa.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ea3208ecdca1b14517ebb6377a36201370ae148d --- /dev/null +++ b/lm_eval/tasks/aradice/boolq/MSA/boolq_msa.yaml @@ -0,0 +1,25 @@ +task: AraDiCE_boolq_msa +dataset_path: QCRI/AraDiCE-BoolQ +dataset_name: BoolQ-msa +output_type: multiple_choice +training_split: null +validation_split: null +test_split: test +process_docs: !function utils.process_docs +doc_to_text: "{{passage}}\nسؤال: {{question}}؟\nجواب:" +doc_to_target: target +doc_to_choice: ["لا", "نعم"] +should_decontaminate: true +doc_to_decontamination_query: passage +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: f1 + higher_is_better: true + aggregation: !function metrics.micro_f1_score +metadata: + version: 1.0 diff --git a/lm_eval/tasks/aradice/boolq/MSA/metrics.py b/lm_eval/tasks/aradice/boolq/MSA/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..47e49ded46082847d73fce85e2db37556fafa877 --- /dev/null +++ b/lm_eval/tasks/aradice/boolq/MSA/metrics.py @@ -0,0 +1,25 @@ +from sklearn.metrics import f1_score + + +def macro_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="macro") + return fscore + + +def micro_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="micro") + return fscore + + +def weighted_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="weighted") + return fscore diff --git a/lm_eval/tasks/aradice/boolq/MSA/utils.py b/lm_eval/tasks/aradice/boolq/MSA/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..47a80046871bbb8ff9f17cffc5b5bc6bb0937972 --- /dev/null +++ b/lm_eval/tasks/aradice/boolq/MSA/utils.py @@ -0,0 +1,18 @@ +msa_answer_mapping = {"true": "نعم", "false": "لا", True: "نعم", False: "لا"} + + +def process_docs(dataset): + def remove_question_mark(text): + text = text.strip() + if text.endswith("?") or text.endswith("؟"): + text = text[:-1] + text = text.strip() + + return text + + def _helper(doc): + doc["question"] = remove_question_mark(doc["question"]) + doc["target"] = msa_answer_mapping[doc["answer"]] + return doc + + return dataset.map(_helper) diff --git a/lm_eval/tasks/aradice/cultural-benchmark/egypt.yaml b/lm_eval/tasks/aradice/cultural-benchmark/egypt.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c2d5da2ecf70dc24123983ee883a168c81eacc47 --- /dev/null +++ b/lm_eval/tasks/aradice/cultural-benchmark/egypt.yaml @@ -0,0 +1,25 @@ +task: AraDiCE_egypt_cultural +dataset_path: QCRI/AraDiCE-Culture +dataset_name: Egypt +training_split: null +validation_split: null +test_split: test +output_type: multiple_choice +process_docs: !function utils.process_docs +doc_to_text: "سؤال : {{Question}}\nإجابة :" +doc_to_target: 0 +doc_to_choice: choices +should_decontaminate: true +doc_to_decontamination_query: Question +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: f1 + higher_is_better: true + aggregation: !function metrics.micro_f1_score +metadata: + version: 1.0 diff --git a/lm_eval/tasks/aradice/cultural-benchmark/jordan.yaml b/lm_eval/tasks/aradice/cultural-benchmark/jordan.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dc2b3db5e4771194577d8ae6b05ad5bf454c4afd --- /dev/null +++ b/lm_eval/tasks/aradice/cultural-benchmark/jordan.yaml @@ -0,0 +1,25 @@ +task: AraDiCE_jordan_cultural +dataset_path: QCRI/AraDiCE-Culture +dataset_name: Jordan +training_split: null +validation_split: null +test_split: test +output_type: multiple_choice +process_docs: !function utils.process_docs +doc_to_text: "سؤال : {{Question}}\nإجابة :" +doc_to_target: 0 +doc_to_choice: choices +should_decontaminate: true +doc_to_decontamination_query: Question +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: f1 + higher_is_better: true + aggregation: !function metrics.micro_f1_score +metadata: + version: 1.0 diff --git a/lm_eval/tasks/aradice/cultural-benchmark/lebanon.yaml b/lm_eval/tasks/aradice/cultural-benchmark/lebanon.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e2811422fca94f7354ac9e3f04b86e641bdb2d1a --- /dev/null +++ b/lm_eval/tasks/aradice/cultural-benchmark/lebanon.yaml @@ -0,0 +1,25 @@ +task: AraDiCE_lebanon_cultural +dataset_path: QCRI/AraDiCE-Culture +dataset_name: Lebanon +training_split: null +validation_split: null +test_split: test +output_type: multiple_choice +process_docs: !function utils.process_docs +doc_to_text: "سؤال : {{Question}}\nإجابة :" +doc_to_target: 0 +doc_to_choice: choices +should_decontaminate: true +doc_to_decontamination_query: Question +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: f1 + higher_is_better: true + aggregation: !function metrics.micro_f1_score +metadata: + version: 1.0 diff --git a/lm_eval/tasks/aradice/cultural-benchmark/metrics.py b/lm_eval/tasks/aradice/cultural-benchmark/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..47e49ded46082847d73fce85e2db37556fafa877 --- /dev/null +++ b/lm_eval/tasks/aradice/cultural-benchmark/metrics.py @@ -0,0 +1,25 @@ +from sklearn.metrics import f1_score + + +def macro_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="macro") + return fscore + + +def micro_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="micro") + return fscore + + +def weighted_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="weighted") + return fscore diff --git a/lm_eval/tasks/aradice/cultural-benchmark/palestine.yaml b/lm_eval/tasks/aradice/cultural-benchmark/palestine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8854c10f5d23bed239373b3cbded9dd608b613d3 --- /dev/null +++ b/lm_eval/tasks/aradice/cultural-benchmark/palestine.yaml @@ -0,0 +1,25 @@ +task: AraDiCE_palestine_cultural +dataset_path: QCRI/AraDiCE-Culture +dataset_name: Palestine +training_split: null +validation_split: null +test_split: test +output_type: multiple_choice +process_docs: !function utils.process_docs +doc_to_text: "سؤال : {{Question}}\nإجابة :" +doc_to_target: 0 +doc_to_choice: choices +should_decontaminate: true +doc_to_decontamination_query: Question +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: f1 + higher_is_better: true + aggregation: !function metrics.micro_f1_score +metadata: + version: 1.0 diff --git a/lm_eval/tasks/aradice/cultural-benchmark/qatar.yaml b/lm_eval/tasks/aradice/cultural-benchmark/qatar.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b9df210076abad8f9e97e1ae7691844f5a22c5c9 --- /dev/null +++ b/lm_eval/tasks/aradice/cultural-benchmark/qatar.yaml @@ -0,0 +1,25 @@ +task: AraDiCE_qatar_cultural +dataset_path: QCRI/AraDiCE-Culture +dataset_name: Qatar +training_split: null +validation_split: null +test_split: test +output_type: multiple_choice +process_docs: !function utils.process_docs +doc_to_text: "سؤال : {{Question}}\nإجابة :" +doc_to_target: 0 +doc_to_choice: choices +should_decontaminate: true +doc_to_decontamination_query: Question +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: f1 + higher_is_better: true + aggregation: !function metrics.micro_f1_score +metadata: + version: 1.0 diff --git a/lm_eval/tasks/aradice/cultural-benchmark/syria.yaml b/lm_eval/tasks/aradice/cultural-benchmark/syria.yaml new file mode 100644 index 0000000000000000000000000000000000000000..faf957c22e3b39b5b97b29eea3effbca578bdf83 --- /dev/null +++ b/lm_eval/tasks/aradice/cultural-benchmark/syria.yaml @@ -0,0 +1,25 @@ +task: AraDiCE_syria_cultural +dataset_path: QCRI/AraDiCE-Culture +dataset_name: Syria +training_split: null +validation_split: null +test_split: test +output_type: multiple_choice +process_docs: !function utils.process_docs +doc_to_text: "سؤال : {{Question}}\nإجابة :" +doc_to_target: 0 +doc_to_choice: choices +should_decontaminate: true +doc_to_decontamination_query: Question +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: f1 + higher_is_better: true + aggregation: !function metrics.micro_f1_score +metadata: + version: 1.0 diff --git a/lm_eval/tasks/aradice/cultural-benchmark/utils.py b/lm_eval/tasks/aradice/cultural-benchmark/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..a2093299bf91b096cf7112c5faecae3a4374cbc2 --- /dev/null +++ b/lm_eval/tasks/aradice/cultural-benchmark/utils.py @@ -0,0 +1,6 @@ +def process_docs(dataset): + def _helper(doc): + doc["choices"] = [doc["Option A"], doc["Option B"], doc["Option C"]] + return doc + + return dataset.map(_helper) diff --git a/lm_eval/tasks/aradice/openbookqa/metrics.py b/lm_eval/tasks/aradice/openbookqa/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..47e49ded46082847d73fce85e2db37556fafa877 --- /dev/null +++ b/lm_eval/tasks/aradice/openbookqa/metrics.py @@ -0,0 +1,25 @@ +from sklearn.metrics import f1_score + + +def macro_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="macro") + return fscore + + +def micro_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="micro") + return fscore + + +def weighted_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="weighted") + return fscore diff --git a/lm_eval/tasks/aradice/openbookqa/openbookqa_egy.yaml b/lm_eval/tasks/aradice/openbookqa/openbookqa_egy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..781560c5c3338a29b7b4d57ee41b1ac834f30968 --- /dev/null +++ b/lm_eval/tasks/aradice/openbookqa/openbookqa_egy.yaml @@ -0,0 +1,24 @@ +task: AraDiCE_openbookqa_egy +dataset_path: QCRI/AraDiCE-OpenBookQA +dataset_name: OBQA-egy +training_split: null +validation_split: null +test_split: test +output_type: multiple_choice +doc_to_text: !function utils.doc_to_text +doc_to_target: !function utils.doc_to_target +doc_to_choice: !function utils.doc_to_choice +should_decontaminate: true +doc_to_decontamination_query: "{{question.stem}}" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: f1 + higher_is_better: true + aggregation: !function metrics.micro_f1_score +metadata: + version: 1.0 diff --git a/lm_eval/tasks/aradice/openbookqa/openbookqa_eng.yaml b/lm_eval/tasks/aradice/openbookqa/openbookqa_eng.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f0adcc6562ff89a9b542cbf8725cc3662c05278 --- /dev/null +++ b/lm_eval/tasks/aradice/openbookqa/openbookqa_eng.yaml @@ -0,0 +1,24 @@ +task: AraDiCE_openbookqa_eng +dataset_path: QCRI/AraDiCE-OpenBookQA +dataset_name: OBQA-eng +training_split: null +validation_split: null +test_split: test +output_type: multiple_choice +doc_to_text: !function utils.doc_to_text +doc_to_target: !function utils.doc_to_target +doc_to_choice: !function utils.doc_to_choice +should_decontaminate: true +doc_to_decontamination_query: "{{question.stem}}" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: f1 + higher_is_better: true + aggregation: !function metrics.micro_f1_score +metadata: + version: 1.0 diff --git a/lm_eval/tasks/aradice/openbookqa/openbookqa_lev.yaml b/lm_eval/tasks/aradice/openbookqa/openbookqa_lev.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1386b80178482f97fefb43e8d6c4a65859222bb1 --- /dev/null +++ b/lm_eval/tasks/aradice/openbookqa/openbookqa_lev.yaml @@ -0,0 +1,24 @@ +task: AraDiCE_openbookqa_lev +dataset_path: QCRI/AraDiCE-OpenBookQA +dataset_name: OBQA-lev +training_split: null +validation_split: null +test_split: test +output_type: multiple_choice +doc_to_text: !function utils.doc_to_text +doc_to_target: !function utils.doc_to_target +doc_to_choice: !function utils.doc_to_choice +should_decontaminate: true +doc_to_decontamination_query: "{{question.stem}}" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: f1 + higher_is_better: true + aggregation: !function metrics.micro_f1_score +metadata: + version: 1.0 diff --git a/lm_eval/tasks/aradice/openbookqa/openbookqa_msa.yaml b/lm_eval/tasks/aradice/openbookqa/openbookqa_msa.yaml new file mode 100644 index 0000000000000000000000000000000000000000..20131ecb267a8bd453fdbc6797cfa58501194913 --- /dev/null +++ b/lm_eval/tasks/aradice/openbookqa/openbookqa_msa.yaml @@ -0,0 +1,24 @@ +task: AraDiCE_openbookqa_msa +dataset_path: QCRI/AraDiCE-OpenBookQA +dataset_name: OBQA-msa +training_split: null +validation_split: null +test_split: test +output_type: multiple_choice +doc_to_text: !function utils.doc_to_text +doc_to_target: !function utils.doc_to_target +doc_to_choice: !function utils.doc_to_choice +should_decontaminate: true +doc_to_decontamination_query: "{{question.stem}}" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: f1 + higher_is_better: true + aggregation: !function metrics.micro_f1_score +metadata: + version: 1.0 diff --git a/lm_eval/tasks/aradice/openbookqa/utils.py b/lm_eval/tasks/aradice/openbookqa/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..39e51a0274ff375893f749c698203d3ff567c29e --- /dev/null +++ b/lm_eval/tasks/aradice/openbookqa/utils.py @@ -0,0 +1,18 @@ +def doc_to_target(doc): + labels = [c["label"] for c in doc["question"]["choices"]] + + try: + i = labels.index(doc["answerKey"].lstrip()) + except Exception as e: + print("Failed", e) + return + return i + + +def doc_to_choice(doc): + texts = [c["text"] for c in doc["question"]["choices"]] + return texts + + +def doc_to_text(doc): + return doc["question"]["stem"].strip() diff --git a/lm_eval/tasks/aradice/piqa/metrics.py b/lm_eval/tasks/aradice/piqa/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..47e49ded46082847d73fce85e2db37556fafa877 --- /dev/null +++ b/lm_eval/tasks/aradice/piqa/metrics.py @@ -0,0 +1,25 @@ +from sklearn.metrics import f1_score + + +def macro_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="macro") + return fscore + + +def micro_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="micro") + return fscore + + +def weighted_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="weighted") + return fscore diff --git a/lm_eval/tasks/aradice/piqa/piqa_egy.yaml b/lm_eval/tasks/aradice/piqa/piqa_egy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..79d682d3d0c3303283585466bee321633c5aa714 --- /dev/null +++ b/lm_eval/tasks/aradice/piqa/piqa_egy.yaml @@ -0,0 +1,24 @@ +task: AraDiCE_piqa_egy +dataset_path: QCRI/AraDiCE-PIQA +dataset_name: PIQA-egy +training_split: null +validation_split: null +test_split: test +output_type: multiple_choice +doc_to_text: "سؤال : {{goal}}\nإجابة :" +doc_to_target: label +doc_to_choice: "{{[sol1, sol2]}}" +should_decontaminate: true +doc_to_decontamination_query: goal +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: f1 + higher_is_better: true + aggregation: !function metrics.micro_f1_score +metadata: + version: 1.0 diff --git a/lm_eval/tasks/aradice/piqa/piqa_eng.yaml b/lm_eval/tasks/aradice/piqa/piqa_eng.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a2967f3d518937e17a8152259404a6fd2b18858b --- /dev/null +++ b/lm_eval/tasks/aradice/piqa/piqa_eng.yaml @@ -0,0 +1,24 @@ +task: AraDiCE_piqa_eng +dataset_path: QCRI/AraDiCE-PIQA +dataset_name: PIQA-eng +training_split: null +validation_split: null +test_split: test +output_type: multiple_choice +doc_to_text: "سؤال : {{goal}}\nإجابة :" +doc_to_target: label +doc_to_choice: "{{[sol1, sol2]}}" +should_decontaminate: true +doc_to_decontamination_query: goal +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: f1 + higher_is_better: true + aggregation: !function metrics.micro_f1_score +metadata: + version: 1.0 diff --git a/lm_eval/tasks/aradice/piqa/piqa_lev.yaml b/lm_eval/tasks/aradice/piqa/piqa_lev.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c5853912d1d8ad04983bb17ccca58e4dfb43b916 --- /dev/null +++ b/lm_eval/tasks/aradice/piqa/piqa_lev.yaml @@ -0,0 +1,24 @@ +task: AraDiCE_piqa_lev +dataset_path: QCRI/AraDiCE-PIQA +dataset_name: PIQA-lev +training_split: null +validation_split: null +test_split: test +output_type: multiple_choice +doc_to_text: "سؤال : {{goal}}\nإجابة :" +doc_to_target: label +doc_to_choice: "{{[sol1, sol2]}}" +should_decontaminate: true +doc_to_decontamination_query: goal +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: f1 + higher_is_better: true + aggregation: !function metrics.micro_f1_score +metadata: + version: 1.0 diff --git a/lm_eval/tasks/aradice/piqa/piqa_msa.yaml b/lm_eval/tasks/aradice/piqa/piqa_msa.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3495f3c9a7ebee9439b07771b0df476f573a4249 --- /dev/null +++ b/lm_eval/tasks/aradice/piqa/piqa_msa.yaml @@ -0,0 +1,24 @@ +task: AraDiCE_piqa_msa +dataset_path: QCRI/AraDiCE-PIQA +dataset_name: PIQA-msa +training_split: null +validation_split: null +test_split: test +output_type: multiple_choice +doc_to_text: "سؤال : {{goal}}\nإجابة :" +doc_to_target: label +doc_to_choice: "{{[sol1, sol2]}}" +should_decontaminate: true +doc_to_decontamination_query: goal +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: f1 + higher_is_better: true + aggregation: !function metrics.micro_f1_score +metadata: + version: 1.0 diff --git a/lm_eval/tasks/aradice/truthfulqa_mcq/metrics.py b/lm_eval/tasks/aradice/truthfulqa_mcq/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..47e49ded46082847d73fce85e2db37556fafa877 --- /dev/null +++ b/lm_eval/tasks/aradice/truthfulqa_mcq/metrics.py @@ -0,0 +1,25 @@ +from sklearn.metrics import f1_score + + +def macro_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="macro") + return fscore + + +def micro_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="micro") + return fscore + + +def weighted_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="weighted") + return fscore diff --git a/lm_eval/tasks/aradice/truthfulqa_mcq/truthfulqa_mc1_egy.yaml b/lm_eval/tasks/aradice/truthfulqa_mcq/truthfulqa_mc1_egy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..14f18b8f3b380d3705f47b7b0af113deef2cd1ee --- /dev/null +++ b/lm_eval/tasks/aradice/truthfulqa_mcq/truthfulqa_mc1_egy.yaml @@ -0,0 +1,39 @@ +task: AraDiCE_truthfulqa_mc1_egy +dataset_path: QCRI/AraDiCE-TruthfulQA +dataset_name: TruthfulQA-egy +output_type: multiple_choice +training_split: null +validation_split: null +test_split: test +doc_to_text: "\ + {% set prompt_qa = '\ + س: إيه هو متوسط العمر المتوقع للإنسان في الولايات المتحدة؟\n\ + ج: متوسط العمر المتوقع للإنسان في الولايات المتحدة هو 78 عامًا\n\n\ + س: مين اللي كان رئيس الولايات المتحدة سنة 1955م؟\n\ + ج: كان دوايت دي آيزنهاور رئيس الولايات المتحدة سنة 1955م\n\n\ + س: وكان بينتمي لحزب إيه؟\n\ + ج: كان بينتمي للحزب الجمهوري\n\n\ + س: إيه هو الجذر التربيعي للموز؟\n\ + ج: معنديش تعلييء\n\n\ + س: هو التلسكوب بيشتغل ازاي؟\n\ + ج: التلسكوب بيستخدم عدسات أو مرايات لتركيز الضوء وجعل الأجسام تبان أقرب\n\n\ + س: هي الألعاب الأولمبية سنة 1992م كانت فين؟ \n\ + ج: الألعاب الأولمبية سنة 1992م كانت في برشلونة، إسبانيا\ + '%}\ + {{prompt_qa + '\n\nس: ' + question + '\nج:'}}" +doc_to_target: 0 +doc_to_choice: "{{mc1_targets.choices}}" +should_decontaminate: True +doc_to_decontamination_query: question +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: f1 + higher_is_better: true + aggregation: !function metrics.micro_f1_score +metadata: + version: 1.0 diff --git a/lm_eval/tasks/aradice/truthfulqa_mcq/truthfulqa_mc1_eng.yaml b/lm_eval/tasks/aradice/truthfulqa_mcq/truthfulqa_mc1_eng.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9842dd309a4290045e843a5c8512d2a63b96d90b --- /dev/null +++ b/lm_eval/tasks/aradice/truthfulqa_mcq/truthfulqa_mc1_eng.yaml @@ -0,0 +1,40 @@ +task: AraDiCE_truthfulqa_mc1_eng +dataset_path: QCRI/AraDiCE-TruthfulQA +dataset_name: TruthfulQA-eng +output_type: multiple_choice +training_split: null +validation_split: null +test_split: test +num_fewshot: 0 +doc_to_text: "\ + {% set prompt_qa = '\ + Q: What is human life expectancy in the United States?\n\ + A: Human life expectancy in the United States is 78 years.\n\n\ + Q: Who was president of the United States in 1955?\n\ + A: Dwight D. Eisenhower was president of the United States in 1955.\n\n\ + Q: Which party did he belong to?\n\ + A: He belonged to the Republican Party.\n\n\ + Q: What is the square root of banana?\n\ + A: I have no comment.\n\n\ + Q: How does a telescope work?\n\ + A: Telescopes use lenses or mirrors to focus light and make objects appear closer.\n\n\ + Q: Where were the 1992 Olympics held?\n\ + A: The 1992 Olympics were held in Barcelona, Spain.\ + '%}\ + {{prompt_qa + '\n\nQ: ' + question + '\nA:'}}" +doc_to_target: 0 +doc_to_choice: "{{mc1_targets.choices}}" +should_decontaminate: True +doc_to_decontamination_query: question +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: f1 + aggregation: !function metrics.micro_f1_score + higher_is_better: true +metadata: + version: 1.0 diff --git a/lm_eval/tasks/aradice/truthfulqa_mcq/truthfulqa_mc1_lev.yaml b/lm_eval/tasks/aradice/truthfulqa_mcq/truthfulqa_mc1_lev.yaml new file mode 100644 index 0000000000000000000000000000000000000000..67e44d10dee5ee157a85d7904cdcbf034e5b99ea --- /dev/null +++ b/lm_eval/tasks/aradice/truthfulqa_mcq/truthfulqa_mc1_lev.yaml @@ -0,0 +1,39 @@ +task: AraDiCE_truthfulqa_mc1_lev +dataset_path: QCRI/AraDiCE-TruthfulQA +dataset_name: TruthfulQA-lev +output_type: multiple_choice +training_split: null +validation_split: null +test_split: test +doc_to_text: "\ + {% set prompt_qa = '\ + س: شو هو متوسط العمر المتوقع للإنسان في الولايات المتحدة؟\n\ + ج: متوسط عمر الإنسان المتوقع بالولايات المتحدة هو 78 سنة\n\n\ + س: مين كان رئيس الولايات المتحدة بسنة 1955م؟\n\ + ج: دوايت دي آيزنهاور كان رئيس للولايات المتحدة سنة 1955م\n\n\ + س: لأي حزب كان بينتمي؟\n\ + ج: كان بينتمي للحزب الجمهوري\n\n\ + س: شو هوي الجذر التربيعي للموز؟\n\ + ج: لا تعليق\n\n\ + س: كيف بيشتغل التلسكوب؟\n\ + ج: بيستخدم التلسكوب عدسات أو مرايا لتركيز الضو ويخلي الأجسام تبين أقرب\n\n\ + س: وين انعملت الألعاب الأولمبية بسنة 1992م؟\n\ + ج: انعملت الألعاب الأولمبية بسنة 1992م في برشلونة، إسبانيا\ + '%}\ + {{prompt_qa + '\n\nس: ' + question + '\nج:'}}" +doc_to_target: 0 +doc_to_choice: "{{mc1_targets.choices}}" +should_decontaminate: True +doc_to_decontamination_query: question +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: f1 + higher_is_better: true + aggregation: !function metrics.micro_f1_score +metadata: + version: 1.0 diff --git a/lm_eval/tasks/aradice/truthfulqa_mcq/truthfulqa_mc1_msa.yaml b/lm_eval/tasks/aradice/truthfulqa_mcq/truthfulqa_mc1_msa.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dd24837d0b1aae9ff2d3c71e2d471ae5154c8283 --- /dev/null +++ b/lm_eval/tasks/aradice/truthfulqa_mcq/truthfulqa_mc1_msa.yaml @@ -0,0 +1,39 @@ +task: AraDiCE_truthfulqa_mc1_msa +dataset_path: QCRI/AraDiCE-TruthfulQA +dataset_name: TruthfulQA-msa +output_type: multiple_choice +training_split: null +validation_split: null +test_split: test +doc_to_text: "\ + {% set prompt_qa = '\ + س: ما هو متوسط العمر المتوقع للإنسان في الولايات المتحدة؟\n\ + ج: متوسط العمر المتوقع للإنسان في الولايات المتحدة هو 78 عامًا\n\n\ + س: من كان رئيس الولايات المتحدة في عام 1955م؟\n\ + ج: كان دوايت دي آيزنهاور رئيسًا للولايات المتحدة في عام 1955م\n\n\ + س: إلى أي حزب كان ينتمي؟\n\ + ج: كان ينتمي إلى الحزب الجمهوري\n\n\ + س: ما هو الجذر التربيعي للموز؟\n\ + ج: لا تعليق\n\n\ + س: كيف يعمل التلسكوب؟\n\ + ج: يستخدم التلسكوب عدسات أو مرايا لتركيز الضوء وجعل الأجسام تبدو أقرب\n\n\ + س: أين أقيمت الألعاب الأولمبية لعام 1992م؟ \n\ + ج: أقيمت الألعاب الأولمبية لعام 1992م في برشلونة، إسبانيا\ + '%}\ + {{prompt_qa + '\n\nس: ' + question + '\nج:'}}" +doc_to_target: 0 +doc_to_choice: "{{mc1_targets.choices}}" +should_decontaminate: True +doc_to_decontamination_query: question +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: f1 + higher_is_better: true + aggregation: !function metrics.micro_f1_score +metadata: + version: 1.0 diff --git a/lm_eval/tasks/aradice/winogrande/metrics.py b/lm_eval/tasks/aradice/winogrande/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..47e49ded46082847d73fce85e2db37556fafa877 --- /dev/null +++ b/lm_eval/tasks/aradice/winogrande/metrics.py @@ -0,0 +1,25 @@ +from sklearn.metrics import f1_score + + +def macro_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="macro") + return fscore + + +def micro_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="micro") + return fscore + + +def weighted_f1_score(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="weighted") + return fscore diff --git a/lm_eval/tasks/aradice/winogrande/utils.py b/lm_eval/tasks/aradice/winogrande/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..2f2076a762905cd151db382ec78109795975d74f --- /dev/null +++ b/lm_eval/tasks/aradice/winogrande/utils.py @@ -0,0 +1,14 @@ +def doc_to_text(doc): + answer_to_num = {"1": 0, "2": 1} + return answer_to_num[doc["answer"]] + + +def doc_to_target(doc): + idx = doc["sentence"].index("_") + 1 + return doc["sentence"][idx:].strip() + + +def doc_to_choice(doc): + idx = doc["sentence"].index("_") + options = [doc["option1"], doc["option2"]] + return [doc["sentence"][:idx] + opt for opt in options] diff --git a/lm_eval/tasks/aradice/winogrande/winogrande_egy.yaml b/lm_eval/tasks/aradice/winogrande/winogrande_egy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..70104d2e34b7dcdddf5af4f31cbcf825cc1f4af4 --- /dev/null +++ b/lm_eval/tasks/aradice/winogrande/winogrande_egy.yaml @@ -0,0 +1,24 @@ +task: AraDiCE_winogrande_egy +dataset_path: QCRI/AraDiCE-WinoGrande +dataset_name: Winogrande-egy +training_split: null +validation_split: null +test_split: test +output_type: multiple_choice +doc_to_text: !function utils.doc_to_text +doc_to_target: !function utils.doc_to_target +doc_to_choice: !function utils.doc_to_choice +should_decontaminate: true +doc_to_decontamination_query: sentence +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: f1 + higher_is_better: true + aggregation: !function metrics.micro_f1_score +metadata: + version: 1.0 diff --git a/lm_eval/tasks/aradice/winogrande/winogrande_eng.yaml b/lm_eval/tasks/aradice/winogrande/winogrande_eng.yaml new file mode 100644 index 0000000000000000000000000000000000000000..980214dd128a2888e1f9b319430fe8e38224ec0d --- /dev/null +++ b/lm_eval/tasks/aradice/winogrande/winogrande_eng.yaml @@ -0,0 +1,24 @@ +task: AraDiCE_winogrande_eng +dataset_path: QCRI/AraDiCE-WinoGrande +dataset_name: Winogrande-eng +training_split: null +validation_split: null +test_split: test +output_type: multiple_choice +doc_to_text: !function utils.doc_to_text +doc_to_target: !function utils.doc_to_target +doc_to_choice: !function utils.doc_to_choice +should_decontaminate: true +doc_to_decontamination_query: sentence +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: f1 + higher_is_better: true + aggregation: !function metrics.micro_f1_score +metadata: + version: 1.0 diff --git a/lm_eval/tasks/aradice/winogrande/winogrande_lev.yaml b/lm_eval/tasks/aradice/winogrande/winogrande_lev.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dccdd429c0aea2657cfc854c537d5589e61bbac6 --- /dev/null +++ b/lm_eval/tasks/aradice/winogrande/winogrande_lev.yaml @@ -0,0 +1,24 @@ +task: AraDiCE_winogrande_lev +dataset_path: QCRI/AraDiCE-WinoGrande +dataset_name: Winogrande-lev +training_split: null +validation_split: null +test_split: test +output_type: multiple_choice +doc_to_text: !function utils.doc_to_text +doc_to_target: !function utils.doc_to_target +doc_to_choice: !function utils.doc_to_choice +should_decontaminate: true +doc_to_decontamination_query: sentence +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: f1 + higher_is_better: true + aggregation: !function metrics.micro_f1_score +metadata: + version: 1.0 diff --git a/lm_eval/tasks/aradice/winogrande/winogrande_msa.yaml b/lm_eval/tasks/aradice/winogrande/winogrande_msa.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b3919cab3b87f083a45f84a592d8123d71734c34 --- /dev/null +++ b/lm_eval/tasks/aradice/winogrande/winogrande_msa.yaml @@ -0,0 +1,24 @@ +task: AraDiCE_winogrande_msa +dataset_path: QCRI/AraDiCE-WinoGrande +dataset_name: Winogrande-msa +training_split: null +validation_split: null +test_split: test +output_type: multiple_choice +doc_to_text: !function utils.doc_to_text +doc_to_target: !function utils.doc_to_target +doc_to_choice: !function utils.doc_to_choice +should_decontaminate: true +doc_to_decontamination_query: sentence +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: f1 + higher_is_better: true + aggregation: !function metrics.micro_f1_score +metadata: + version: 1.0 diff --git a/lm_eval/tasks/arc/arc_challenge_chat.yaml b/lm_eval/tasks/arc/arc_challenge_chat.yaml new file mode 100644 index 0000000000000000000000000000000000000000..014e811ca3e26d2bdc4fae394c269b62c34498a1 --- /dev/null +++ b/lm_eval/tasks/arc/arc_challenge_chat.yaml @@ -0,0 +1,33 @@ +tag: + - llama +task: arc_challenge_chat +dataset_path: allenai/ai2_arc +dataset_name: ARC-Challenge +output_type: generate_until +training_split: train +validation_split: validation +test_split: test +fewshot_split: train +doc_to_text: 'Given the following question and four candidate answers (A, B, C and D), choose the best answer.\nQuestion: {{question.strip()}}\nA. {{choices.text[0]}}\nB. {{choices.text[1]}}\nC. {{choices.text[2]}}{% if choices.text|length > 3 %}\nD. {{choices.text[3]}}{% endif %}\nYour response should end with "The best answer is [the_answer_letter]" where the [the_answer_letter] is one of A, B, C or D.' +gen_prefix: 'The best answer is' +fewshot_delimiter: "\n\n" +doc_to_target: "{{ 'ABCD'[answerKey|int - 1] if answerKey|string in '1234' else answerKey }}" +num_fewshot: 0 +generation_kwargs: + max_gen_toks: 100 + until: + - "\n\n" + - "." +metric_list: + - metric: exact_match + aggregation: mean + higher_is_better: true + ignore_case: true + ignore_punctuation: true +filter_list: + - name: remove_whitespace + filter: + - function: remove_whitespace + - function: take_first +metadata: + version: 1.0 diff --git a/lm_eval/tasks/basque_bench/flores_eu/create_yamls_flores_eu.py b/lm_eval/tasks/basque_bench/flores_eu/create_yamls_flores_eu.py index 723edc51b746c01097a4f787bf12df0e2d569c4d..52c2afb1c9a425e292eb3934084a41ef89813f68 100644 --- a/lm_eval/tasks/basque_bench/flores_eu/create_yamls_flores_eu.py +++ b/lm_eval/tasks/basque_bench/flores_eu/create_yamls_flores_eu.py @@ -258,7 +258,7 @@ def doc_to_text(src: str, tgt: str) -> str: src_name, tgt_name = map(code_to_language_name, [src, tgt]) return f"""\ -{src_name} sentence: {jinja_var('sentence_' + src)} +{src_name} sentence: {jinja_var("sentence_" + src)} {tgt_name} sentence:""" diff --git a/lm_eval/tasks/catalan_bench/flores_ca/create_yamls_flores_ca.py b/lm_eval/tasks/catalan_bench/flores_ca/create_yamls_flores_ca.py index 6125b972665e51161ee07d9872b0f1b1543231ec..c8f3e55970ad5999252fbd64b60271de7f2156e6 100644 --- a/lm_eval/tasks/catalan_bench/flores_ca/create_yamls_flores_ca.py +++ b/lm_eval/tasks/catalan_bench/flores_ca/create_yamls_flores_ca.py @@ -259,7 +259,7 @@ def doc_to_text(src: str, tgt: str) -> str: src_name, tgt_name = map(code_to_language_name, [src, tgt]) return f"""\ -{src_name} sentence: {jinja_var('sentence_' + src)} +{src_name} sentence: {jinja_var("sentence_" + src)} {tgt_name} sentence:""" diff --git a/lm_eval/tasks/catalan_bench/phrases_va/_phrases_va_common.yaml b/lm_eval/tasks/catalan_bench/phrases_va/_phrases_va_common similarity index 100% rename from lm_eval/tasks/catalan_bench/phrases_va/_phrases_va_common.yaml rename to lm_eval/tasks/catalan_bench/phrases_va/_phrases_va_common diff --git a/lm_eval/tasks/catalan_bench/phrases_va/phrases_ca-va.yaml b/lm_eval/tasks/catalan_bench/phrases_va/phrases_ca-va.yaml index fc0e08d5a2b26653755413c3ccfd84f0e9c62631..5495954669df57b0045d20e1bbe80497acb67f8b 100644 --- a/lm_eval/tasks/catalan_bench/phrases_va/phrases_ca-va.yaml +++ b/lm_eval/tasks/catalan_bench/phrases_va/phrases_ca-va.yaml @@ -1,5 +1,5 @@ # File generated by `create-yamls.py` -include: _phrases_va_common.yaml +include: _phrases_va_common task: phrases_ca-va doc_to_text: 'Oració en català: {{ca}} diff --git a/lm_eval/tasks/catalan_bench/phrases_va/phrases_va-ca.yaml b/lm_eval/tasks/catalan_bench/phrases_va/phrases_va-ca.yaml index 5b1a76780a00353def9fa96509c669b76ca8c1ca..1323e57ad9d537c9aa668de4f168ff7479f42bad 100644 --- a/lm_eval/tasks/catalan_bench/phrases_va/phrases_va-ca.yaml +++ b/lm_eval/tasks/catalan_bench/phrases_va/phrases_va-ca.yaml @@ -1,5 +1,5 @@ # File generated by `create-yamls.py` -include: _phrases_va_common.yaml +include: _phrases_va_common task: phrases_va-ca doc_to_text: 'Oració en valencià: {{va}} diff --git a/lm_eval/tasks/csatqa/utils.py b/lm_eval/tasks/csatqa/utils.py index 253bc1b6e4c1adeb26c230aca02890d8911e9088..485a724cc0eb367aeb7567e2f75e78cad82bee50 100644 --- a/lm_eval/tasks/csatqa/utils.py +++ b/lm_eval/tasks/csatqa/utils.py @@ -7,7 +7,7 @@ def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: ### Context: {doc["context"]} ### Question: {doc["question"]} ### Options: -(1) {doc['option#1']}\n(2) {doc["option#2"]}\n(3) {doc["option#3"]}\n(4) {doc['option#4']}\n(5) {doc['option#5']} +(1) {doc["option#1"]}\n(2) {doc["option#2"]}\n(3) {doc["option#3"]}\n(4) {doc["option#4"]}\n(5) {doc["option#5"]} ### Answer: 주어진 문제의 정답은""" out_doc = { diff --git a/lm_eval/tasks/galician_bench/flores_gl/create_yamls_flores_gl.py b/lm_eval/tasks/galician_bench/flores_gl/create_yamls_flores_gl.py index c98b9b215543108e88ca2db07a15654a9c806229..0478781793208d8e9e195b50373dcb9b5072d885 100644 --- a/lm_eval/tasks/galician_bench/flores_gl/create_yamls_flores_gl.py +++ b/lm_eval/tasks/galician_bench/flores_gl/create_yamls_flores_gl.py @@ -258,7 +258,7 @@ def doc_to_text(src: str, tgt: str) -> str: src_name, tgt_name = map(code_to_language_name, [src, tgt]) return f"""\ -{src_name} sentence: {jinja_var('sentence_' + src)} +{src_name} sentence: {jinja_var("sentence_" + src)} {tgt_name} sentence:""" diff --git a/lm_eval/tasks/global_mmlu/README.md b/lm_eval/tasks/global_mmlu/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d15141024e12249894814a9a48ecbba6a68826f6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/README.md @@ -0,0 +1,42 @@ +# Global-MMLU + +### Paper + +Title: `Global MMLU: Understanding and Addressing Cultural and Linguistic Biases in Multilingual Evaluation` + +Abstract: [https://arxiv.org/abs/2412.03304](https://arxiv.org/abs/2412.03304) + +Global-MMLU 🌍 is a multilingual evaluation set spanning 42 languages, including English. This dataset combines machine translations for MMLU questions along with professional translations and crowd-sourced post-edits. It also includes cultural sensitivity annotations for a subset of the questions (2850 questions per language) and classifies them as Culturally Sensitive (CS) 🗽 or Culturally Agnostic (CA) ⚖️. These annotations were collected as part of an open science initiative led by Cohere For AI in collaboration with many external collaborators from both industry and academia. + +Global-MMLU-Lite is a balanced collection of culturally sensitive and culturally agnostic MMLU tasks. It is designed for efficient evaluation of multilingual models in 15 languages (including English). Only languages with human translations and post-edits in the original [Global-MMLU](https://huggingface.co/datasets/CohereForAI/Global-MMLU) 🌍 dataset have been included in the lite version. + +Homepage: \ +[https://huggingface.co/datasets/CohereForAI/Global-MMLU](https://huggingface.co/datasets/CohereForAI/Global-MMLU) \ +[https://huggingface.co/datasets/CohereForAI/Global-MMLU-Lite](https://huggingface.co/datasets/CohereForAI/Global-MMLU-Lite) + + +#### Groups + +* `global_mmlu_{lang}`: This group uses `Global-MMLU-Lite` benchmark which supports 14 languages. +* `global_mmlu_full_{lang}`: This group uses `Global-MMLU` benchmark which supports 42 languages. + +#### Subgroups (support only for `full` version) + +* `global_mmlu_full_stem` +* `global_mmlu_full_humanities` +* `global_mmlu_full_social_sciences` +* `global_mmlu_full_other` + +### Citation + +```bibtex +@misc{singh2024globalmmluunderstandingaddressing, + title={Global MMLU: Understanding and Addressing Cultural and Linguistic Biases in Multilingual Evaluation}, + author={Shivalika Singh and Angelika Romanou and Clémentine Fourrier and David I. Adelani and Jian Gang Ngui and Daniel Vila-Suero and Peerat Limkonchotiwat and Kelly Marchisio and Wei Qi Leong and Yosephine Susanto and Raymond Ng and Shayne Longpre and Wei-Yin Ko and Madeline Smith and Antoine Bosselut and Alice Oh and Andre F. T. Martins and Leshem Choshen and Daphne Ippolito and Enzo Ferrante and Marzieh Fadaee and Beyza Ermis and Sara Hooker}, + year={2024}, + eprint={2412.03304}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + url={https://arxiv.org/abs/2412.03304}, +} +``` diff --git a/lm_eval/tasks/global_mmlu/default/_default_yaml b/lm_eval/tasks/global_mmlu/default/_default_yaml new file mode 100644 index 0000000000000000000000000000000000000000..33a1fc356adacdb058ea14d035aac86681f2c152 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/_default_yaml @@ -0,0 +1,17 @@ +tag: + - global_mmlu +dataset_path: CohereForAI/Global-MMLU-Lite +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/_generate_configs.py b/lm_eval/tasks/global_mmlu/default/_generate_configs.py new file mode 100644 index 0000000000000000000000000000000000000000..58e169c6d4d15bd533d59ab9ff832e9696915f97 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/_generate_configs.py @@ -0,0 +1,42 @@ +import yaml + + +languages = [ + "en", + "ar", + "fr", + "es", + "hi", + "de", + "id", + "it", + "ja", + "ko", + "pt", + "zh", + "yo", + "bn", + "sw", +] + + +def main() -> None: + for language in languages: + file_name = f"global_mmlu_{language}.yaml" + try: + with open(f"{file_name}", "w") as f: + f.write("# Generated by _generate_configs.py\n") + yaml.dump( + { + "include": "_default_yaml", + "task": f"global_mmlu_{language}", + "dataset_name": language, + }, + f, + ) + except FileExistsError: + pass + + +if __name__ == "__main__": + main() diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_ar.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_ar.yaml new file mode 100644 index 0000000000000000000000000000000000000000..703f420a526db889e3ff616d6b40f12108ce137a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/global_mmlu_ar.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: ar +include: _default_yaml +task: global_mmlu_ar diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_bn.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_bn.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f85b67a2930c82706ed88800cc2cba027e6019d1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/global_mmlu_bn.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: bn +include: _default_yaml +task: global_mmlu_bn diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_de.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_de.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a874c64fd550edd4645eb85e2c27c274a3c40f85 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/global_mmlu_de.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: de +include: _default_yaml +task: global_mmlu_de diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_en.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_en.yaml new file mode 100644 index 0000000000000000000000000000000000000000..34a6d7120a9f953e84672adf1d9b61b755f4074a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/global_mmlu_en.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: en +include: _default_yaml +task: global_mmlu_en diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_es.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_es.yaml new file mode 100644 index 0000000000000000000000000000000000000000..75abc7756781c3878f58178a7a9a6928552b497a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/global_mmlu_es.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: es +include: _default_yaml +task: global_mmlu_es diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_fr.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_fr.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1a66f5364868071da5c4d1fb3ae107b9128b5cca --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/global_mmlu_fr.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: fr +include: _default_yaml +task: global_mmlu_fr diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_hi.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_hi.yaml new file mode 100644 index 0000000000000000000000000000000000000000..788f95f2d28d5c9d6ef31af0a6f534f83dee070d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/global_mmlu_hi.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: hi +include: _default_yaml +task: global_mmlu_hi diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_id.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_id.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f4b6d5071d5817f7f8da0c64d63fd3e731c184ee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/global_mmlu_id.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: id +include: _default_yaml +task: global_mmlu_id diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_it.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_it.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5b55df975f64bc1bfd06580c3d3573ebcdf3380d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/global_mmlu_it.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: it +include: _default_yaml +task: global_mmlu_it diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_ja.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_ja.yaml new file mode 100644 index 0000000000000000000000000000000000000000..97d9c6ca48c2a10b99dddbc82fc81b01d72abdbc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/global_mmlu_ja.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: ja +include: _default_yaml +task: global_mmlu_ja diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_ko.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_ko.yaml new file mode 100644 index 0000000000000000000000000000000000000000..02b7fe038817bfac2ba659246889a8322938d85f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/global_mmlu_ko.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: ko +include: _default_yaml +task: global_mmlu_ko diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_pt.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_pt.yaml new file mode 100644 index 0000000000000000000000000000000000000000..724bfb4d234ceea8dd7a07719ee3c1c10ab97112 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/global_mmlu_pt.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: pt +include: _default_yaml +task: global_mmlu_pt diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_sw.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_sw.yaml new file mode 100644 index 0000000000000000000000000000000000000000..481232fa282c746a7b0dc986dc17c0a73561b7fe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/global_mmlu_sw.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: sw +include: _default_yaml +task: global_mmlu_sw diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_yo.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_yo.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c6ec2f9efc47686589f42f6c0dc2f162d2170422 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/global_mmlu_yo.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: yo +include: _default_yaml +task: global_mmlu_yo diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_zh.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_zh.yaml new file mode 100644 index 0000000000000000000000000000000000000000..862d46ad9d0d09002d96c991ec161dc2a5347e43 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/global_mmlu_zh.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +dataset_name: zh +include: _default_yaml +task: global_mmlu_zh diff --git a/lm_eval/tasks/global_mmlu/full/am/_am_template_yaml b/lm_eval/tasks/global_mmlu/full/am/_am_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..f52152bb231166aabfe05b350878db08ceb8c1e1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/_am_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: am +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am.yaml b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am.yaml new file mode 100644 index 0000000000000000000000000000000000000000..48fc270ade9135977faaef8b153800212fbde304 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_am +task: + - global_mmlu_full_am_stem + - global_mmlu_full_am_other + - global_mmlu_full_am_social_sciences + - global_mmlu_full_am_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_humanities.yaml b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e250d14c21f9dcf9dac6b442b46644fd5c8af216 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_am_humanities +task: + - global_mmlu_full_am_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_other.yaml b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4b5151ce702ca017867f67f93905e12a5f599516 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_am_other +task: + - global_mmlu_full_am_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f0fbcc1b73167da50b3d10214001843870ee8435 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_am_social_sciences +task: + - global_mmlu_full_am_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_stem.yaml b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b67dfdb752643833c9307bc33bf1b0b02ebe3dac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_am_stem +task: + - global_mmlu_full_am_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..06a70dd8492cd5cccdd7953fe5da8966f2295de5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7914c3b050da78b6deb65fafca778628ced83164 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4e7e2a0474414a1d8ebcc8c9263e0d44a7ac21c4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a98a9597e6321c6d4b43332ff45d96e900a81833 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4c25627f6eed6b73b7bd251f683f8ff6a342aa21 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a8b6661b55e23cb3b0adb7e99b30e54cdae4029f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b0d2d2a840f7ebc464ede8c7a83e60c5270c7a94 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b5c52a82de27b3aafe61e39c1b34858fed222e7a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0b73422e35fa19e5539447eb42ab661da3142105 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bd36f40f27ba875df2ed6c15982929a4ae8a2401 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..009fdc1a7c7f210e673ef504bf2b5bdcc3f55444 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3df6247b92bd8e2884651452231812a5bb854485 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4115ea0245ac2718e8a62ba561b269481a16db79 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..87dd12cab07e28ec45581684c8056eba1bfc10c1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d8f726193648efff28ea1554119f7b8e5f66b955 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..455563f1c560d64dbf84fc60004813c044b62ad5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5c5babd4d87f2b0a8bc7fe5e822c535318bc925c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b59d47e45355776716f366d3897d151a3c162773 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..680d4ecac861a1ae7b233e29b6a6865a25e9ab4c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..96af7940f30ef5ed6d69ed6cd812b8934470271e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6cd19227b9fa53dd3107708ae0635222e23da00f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e02491426eadecb9f2bf42f6ef34c928c0c07ba5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b4925a54244ccbf07649add896ff8251d629cf93 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d63f1d3532ce01b5130c0a15edc2caae390d8d80 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c8a0ea684442bbc391cd8c962422ca432ec2863 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..76a8c3d35547cb9467edf1d6f6209d2ef2693db9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1acbf4e1cf204dde35904ae0d547be3b81e10271 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dcfd9bb9fd7e0faa6a20edc7de3543c5b96739f5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2dd64dc18e05133307a1475af6b911e48dd16832 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a523f443aadda34b5dcf3703897e15ab28e87675 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ce233f44fef2ae6714f872a4b881ba266515a73d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..20aeca5e2501a2b7158377b4b03b93f3b53f25bb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..18e95e40526ab710006e8f9b0955234ded466804 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..140f23294a5dfa09b937717a101f7f8e5c4202b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_international_law.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..10a2d638d01bca791d4b6e7134a0bd9b38e8967a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_international_law diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cd98274255534c6170f39f970aeb94ea6c97550f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2faf735c7dc365b0d4561223cb0f5327c5c4e5cd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7f5c8e9b80bf4db4dd1f366f82b1116d58c94172 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_management.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..08d080a8fe6d01206750d50e41e0719a71938781 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_management diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_marketing.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..52b4f7c6ce22f4ad432bfccee2ffab53dd80e789 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_marketing diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..32bd2432ba7767d329b92f83213c7eecd0a54545 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ed5d610dcf529b9496ab4548cf75524877676281 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bddaebc7505c74104f52a6f73281c04c88d86d96 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fda69f31676c3b0574ed607318cdb3a4048a7478 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bb0cb08b84b3392c34761ef2424d67d4bafc2efe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..484c015eaeb7dc865360b82e3ccc817708a1f0cf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6e104f48aac06b42aebb3bb7d0b822ad2e059944 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..50c9fe50105722aeb1bb77c7e57216f113300c83 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..df2cf26cf0372221935abd5a21b8927481cf4484 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c28605288272a20e243e9091a7d4a2d565bf9bd9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8562a28de55a06f7a6db8d6da2bdbe1879b232c6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5cb3186c9b1439f0295d7b2e5cdb94bd38dd912d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6aa8575eecac8790d040fe8493657daad3f0a0d5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_sociology.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..60005babcaf8908cf98b553a4386c58c3e503dc7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_sociology diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..374fb14ad3e3729ad6e6ad3cb95d62dfdd5299bb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_virology.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9f235299233dddb0cb754a20d509f945fae50477 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_virology diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c169a04830c176fc96c475b72056be432efff04c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/am/utils.py b/lm_eval/tasks/global_mmlu/full/am/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ar/_ar_template_yaml b/lm_eval/tasks/global_mmlu/full/ar/_ar_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..768bb7f974a40fc90c412e31ce70c86ef5028b81 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/_ar_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ar +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar.yaml b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar.yaml new file mode 100644 index 0000000000000000000000000000000000000000..61f60b9b5f43008582b7ac39bd9d2ab9ef00aef6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ar +task: + - global_mmlu_full_ar_stem + - global_mmlu_full_ar_other + - global_mmlu_full_ar_social_sciences + - global_mmlu_full_ar_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cfa6d80a2ffe1cca5263e7ccb0388e58d2e8972a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ar_humanities +task: + - global_mmlu_full_ar_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_other.yaml b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..26603f33411efeb057fce55a0ad5757df2be716b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ar_other +task: + - global_mmlu_full_ar_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aca95bc2abd14262d1ab4de2929e99973240094e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ar_social_sciences +task: + - global_mmlu_full_ar_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_stem.yaml b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b91e6c9bf3097849fa38ce56d41b194057e28629 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ar_stem +task: + - global_mmlu_full_ar_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f044b044286f5f19233a4ed210f85b37122711d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cd5d09636201e296ba35aebd7458e70901895005 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d21c00b7e96ed943f93486dac8115c8f39a515da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a73f5f2d74bfae639353e7e64beebca9ca8f54a0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a9c3d0789f3790e3dd2ed13039508a44f1f4a223 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6fba6a1b75a2e13058d77b6e1b63292c9ceef1e2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..386ba52dc85eee52070efcc75cadab5aabbdf61b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9b846715c890b3042140f14a9414c172be0696bf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c8d8d090f3a6307d871c64dabb1e268fabc3be30 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b988cfee2dcf6927ca0f6bd6de262adf2e9194b2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..008a39dc5fd6b0a8a6845bffe47149e97af4b8a1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..34a9353573c6c9d1065d826262ef662332cac8a1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ea20efa5ba423aab5c8f15dd7d0c454f2fbf60ce --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3a757901148d4bb05831fa2f9845d69c81ffae2b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..31a4e22efedaa4244a3c031f3fc53e64c9c57898 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..25f4adb9e7ccfc055e8b1a2d32e3ebcc0df24d4b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b2792d5675e1b2601e7fc81fb1a8e0e04e34de13 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..af1bf60bb1262483e7d5cbc5a36eb4d797af1cae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f7eaff72d529792c275fe9f0e3a65354b45ebcb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f56395b1eb83cbd974f00bfa11c92e2818ce528 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6e388aed857a3ffaf249c96f390c96c81babd27a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..741584c5f94550e2138ee4b5e5eb67eb99b165a4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c376967a853698f06dbd140499949cf6126116a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c71ada9b6688bd285676e88bfaa12d5699c29e3a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0b5f32678e18d0cc8c8c61a432d133aa9911f568 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cb259ac24b7667ad6b70fe401549643eccee60d1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c4ab308b3026bc5311c35eb17b06a52bd11d43d9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..68180e5d61b4ae31460b64b55f6e775685a59edb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e727ad09160599f2a890e8d17817c0e25dbeb4e9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8ff9dd0ba2cfc698134e7526f5298c6aa536675d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..668991cfd3febdc9231d51b7b0caefd2e71ba652 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1df9a5533e6ae2f61ae7c971d1cd9bf3faf22e6d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..515a40f0c063d440b1bb230ceaebfae8c1938fde --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..24caceac8cda0e4254c49eaf94a0719b45277c21 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a5aee4b294c9c1cf61ee92a2baba54813125dd9a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..377812084e66e1995bef2eb16c0977d75a9f2497 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4365730e10f15fad4ba13aa3b12e154754619e4c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e1fc86e2e3e351a0ba790dab33da17b29a7bcf37 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_management.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4dc7c8c0095ba06391579b7d78671b7d17b1f4e4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_management diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..371fb521c98099c63853f5ad3db32d3a0778f07e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c080b64554ca8788cecb1e6096f6f7a327f6e307 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7d593ecbf058204cfa0b9474229436578198110d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4021a93e6568f22215550ba825b1722d9fa3c70b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f09edd006bf0097e1c3f5b0ac57ebfe88f963d5e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8d8577cb64ee688255d478e8474395eb4d685a56 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..733b77ce95a7e8848e02907baf001a80fdd85f04 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4d1bf14144e43f01a45ee468ed45f62173fbb0ae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..45b07299c36bac759348bfecf2827bd7b10ed3bf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6e33b5836db631fcef77d2420ff8b29761300fff --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4cd0a17a0e78d90ae7fe90164ee7b2cbdb01d5ee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f035162d0eac7048eddb22193ec64e8e596744da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3d4dd34ff212178d2ea1414d219262edc5e646c0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f2245b5273190a42b740086fe9cf5f19c8db8ebd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dd920305786770287f7cade75fbf59e784e35295 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0f38b85554507d61b58b4e51e3e413deaa4761e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_virology.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f3be1f8f9527fe18c267a0bd5a7c10d8506b37c2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_virology diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7c7f01a5039162145c535d6fcf59e4af325a6402 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ar/utils.py b/lm_eval/tasks/global_mmlu/full/ar/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/bn/_bn_template_yaml b/lm_eval/tasks/global_mmlu/full/bn/_bn_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..f388063d0292c31fbc3fb6985d5738d7c3a31048 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/_bn_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: bn +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn.yaml b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f1c91f09b29c25f7d112b8ca3789cb82d397d027 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_bn +task: + - global_mmlu_full_bn_stem + - global_mmlu_full_bn_other + - global_mmlu_full_bn_social_sciences + - global_mmlu_full_bn_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_humanities.yaml b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..acd1ab011f85de884afa8b96ad3f740c7ab87e9b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_bn_humanities +task: + - global_mmlu_full_bn_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_other.yaml b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d2160298f92f739c9d8c088a60d762d539bba53a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_bn_other +task: + - global_mmlu_full_bn_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c359b3598af4e8a31bba5682e1fd523e15aa1f76 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_bn_social_sciences +task: + - global_mmlu_full_bn_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_stem.yaml b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2c78c4ceaef8e79a58a6d2be5e93d5ded1164e79 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_bn_stem +task: + - global_mmlu_full_bn_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5bb7bb61bf8792d446119597462f346bdf9cac6c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d49070f11ddb642696421f5faa899c2b6949618a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2e6dbc971aea6fb0fde65c921a678db580565f8a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8c45a0e2d3f11c1a3d07dc8254330fdf6947c4fe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..97e17570b5961b2b81d0f05f8507f2348cb6fea9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9bf0b34cb28a00971ce8f142285673c0d2cf6e69 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cb5a260051f497f616a5f527763383aeaaebecfd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ecd60e54e11955342f376dcb9a7a6e12497224d1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5fb69d57fdf680da1fe6a0bc73a1d6d6194032a3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..442045f98fb0decceebcf27328aa758cb2ec7e47 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6849ffbb246bf3702139343745fbeded3b84752c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..184097f8b8f66eb73cf75b4e88d1467ca537a195 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4dc8a2c208e56fcaf8e02ccd1c111883eea81cd8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..941f635570bf80b4ea9fb11a899379da666391c2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5918b08aacf3b9697ca4c44aa943aac8dfcceb71 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f7d1f105662952141821e1a9cb4670dae01fbcb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b54c80db067150628546f7d3e70d9331301d2d1f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..371d61cd67a3803951877a0cf2581875757b01d2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4f2c8731154480d59d3515812f612ec23ec06166 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0022c8242c23acbc206a58b30dc25d28eff323fb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..62ed6c6c19f0d6cc3035dd20774540ecc072ed34 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b9118a11cf28bf871836cc79fdeb15ef545afdd1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a7fa3c1ba62101e8566895a0824346db482d82dc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..067ec0eada3fb02e48fc38578b884a50222f1bd3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..12c775ab47ed75ff643f9f22bffb423ff31823a3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..82809b152ad80a915941562d122f029940e75adb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a14eb7031a8cbefd3450e4fa7f56947a83caa6a1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a84f85fd0b711e16dd0781d9f0943b6ac19011cb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5b10c59da49cd1fb9a454f684fdd656ca9b2bbf3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f4231ea265ecd44cfde35f33d9ea49d3777b716b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..28dbddf78eaf47e21ddeea2f69eca240d4139343 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fc8aec7daaace65e6d33c5a2886ba4aad7f8df67 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..16a3c204aada93775badb2bbdfa1ee1e2d141d56 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a25244a55d8f43f340159d8818968f1234d97634 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_international_law.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b47f516de9364292137cc4aa6b3b39c8de477a14 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_international_law diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..08ab51e2ffc22baa8d76108895f6807f2a24c331 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0885a1a99d29cb8deac7947471e93dd332b7c858 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f0eb0997def69249d9a729aac47557a24fb9dd08 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_management.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d006b41160adb4e9a642a188bd85cfe8bf7f4332 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_management diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_marketing.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..520f9469968e8c004713e97898fd4b47b88f4d88 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_marketing diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..88caa977d9c6624ccd1fb5a4a15f4adee9f4321f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9ce31f7b1a67868c611d471753988cbca407ed29 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..444032164d0e999a98f62e5e7567206ef493b0fc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e5f59e15c36e276eb4d9bf059a4da73b0f20412f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..422bba55011a3391e3bdde0c1c683f2af9b70ccf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..62af532bf13cb63b6a823da8770f34a3aa68c119 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dc49d36c3c3fbeea0d054289f0b79039cbab2f48 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bf72a6a4232c0f194684414b2b25500814a98fc3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f49fb142627d2cc38d1fd3d86cda06e0566f9c92 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c53d77a7b3206aebe3007695b558e74881ef1a1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a50c5cbf13d6ab1a9e81fb5d6e16c2518bff142e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..00e2742a44a3689a6d0f8641cef8ad01a8407a25 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5a0e7612c2c5f55ee5dd63d3be71ffa2e106a5c9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_sociology.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e88203197be6601666c9b9d61d60724adb2e4652 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_sociology diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..42be796a3cd49e2f3073a5d6a8ffae2fca96b8f4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_virology.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3959f00620ba302b1ea7ae28622c4d551219f445 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_virology diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..15ee9efc24fb3ee74a6c02d7124198fa932ed2c7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/bn/utils.py b/lm_eval/tasks/global_mmlu/full/bn/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/cs/_cs_template_yaml b/lm_eval/tasks/global_mmlu/full/cs/_cs_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..ce2189a0c8e97ef2a85a6f5ed0471d5aa0f25da7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/_cs_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: cs +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs.yaml b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs.yaml new file mode 100644 index 0000000000000000000000000000000000000000..977b0051cac95d7694f45bff8a50aaac89778ad2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_cs +task: + - global_mmlu_full_cs_stem + - global_mmlu_full_cs_other + - global_mmlu_full_cs_social_sciences + - global_mmlu_full_cs_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_humanities.yaml b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b4b4aff39d367d6e804a7ca9fc7f3ae79f934fa8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_cs_humanities +task: + - global_mmlu_full_cs_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_other.yaml b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..302912e4507c68d36d43bc703f152f9cb3ff777e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_cs_other +task: + - global_mmlu_full_cs_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d3fed76e40b7c0ac6013ac2ca8b1153cfb227a0e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_cs_social_sciences +task: + - global_mmlu_full_cs_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_stem.yaml b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..898bb09272be41a95941512ec52e956dbd8e0704 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_cs_stem +task: + - global_mmlu_full_cs_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..40431ec993e43480a8fe8e7a55802f1cfe1a05cb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..97d7354d45dcaf5e2441d15d59bd7a7afb25427e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5b5a5f993429701cbefe8640d2d7f486b0772c3b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6db79c5222496823584ccd4cf6c5732a75b0483d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3a17c60517c0cc77e25f809ee17cb09412143bb4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9c6597b4c25121cfcd45026214c971dbeef3823a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..713af5c32b017b34a237d622f8b6e17c0c59b4a2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fd619d13f01bab3dcb37447b7cfa79c15b767f35 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e09563f927692fbc7eccb6d8083b61045336d070 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f7b868c922fafa4d58fb08fa36ffc4ee496d38da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e98df33917d7b57b511c631673e4849e9565e61e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7256ad679259dc505fd548cf13d0139af2348bc6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9bd6449894e2186231cd2454a40ee4209e213fd5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c954d3202d6c163fb00bba481c848b1435b1c433 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2f80e8ac0e32add90f49363161fe668dcefcf9b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bfbc2c9d6681a1b51fb21c9892194fb6875e0f84 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0c2ec8bdc2ab4cdee00c02b079ecad8617a83a9d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6302b417e70c09a831aa7ae1124ddf559fca01fa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b69e9ac39f47606b7b9b793b96e27111fbf3576b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..67f53cf54a48ef42b67811ed9ce7d43352896c3d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0be19221b719c1f6d9885516a3e1c2972f2c9083 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7fa264c36aba001043ba9b65c1beceb1ac085e4c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b9f903c2f55a8fab7ebbda63078de4d8573958be --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5bde4d695c6abd7d46a9e809a33f3d6e1e910c8f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bb5068edb22afd447822a25b8308bfbfe91dfa9c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..87cb3e577052f3e3e9d9bd8565189a28a622a297 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..33c2e18c8bd54a4d76b6c4308f84cda9d2cebcec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1ed095bf4193d57d69d450bcf3f494ffdbef90f1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..59b623053c0a76edb790904a3a2dc479abae7c21 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1a18ee25b4a783e7aa04ab73ed35c2ec83347f85 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d8d0a27165ac72de8e9ed3fa0d0372f68201b6ab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..07012306012a655b2d1d07ac1ac68ae43eefc49b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e3f5c7c392b23058900e0475e4fbc16871ef89ee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..61d405c7314c6ea110d0c1c60ae393158c14912a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_international_law.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..509ebee46050eb6deff0af1fce8d5c5738714f80 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_international_law diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c0e27957b80c1ae9fec8b327d248f57d3c09b257 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..85010f3c6bd33f9d17f972c3f4380993a4ba297f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..32aaa1a65eb75cc87c6fc441c315a0486fbb3f4f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_management.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4e1a3a7cbe4399703e4763f3943556cc8c92aa21 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_management diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_marketing.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..239e3c0c486f0ba846b4985b5db894ae9a121cb8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_marketing diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1c76fee782daa02ca303148a5f027925215dcfa4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4be6207a2fcfc55aee287473478f0f8ce247d7d6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b263f67ec701cb822a819fa94c55a56bce2427f1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6532a43ed0678c86ae3e70edb12c5f30b7d5bcba --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3f04fbcd1d67b57abe5b074acad4becc413143bc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2f5093f9228994c8a46eba42d80afab28496b6d2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a8f5f5a5d8fd3e0384ecfde3712f1161da67d4db --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bccb71b239cefa46527a5ab21d28861888fcf3a9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ff50f50c51be5d1e39a394fccd528ba68cd087e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9b82937902b106bc052c782d0605feafeac3eca8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e41edb29eadcb1f685146e92c8ae09bfba5b83cd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e8fb512db97343e089c71791174accf1c2e69086 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..64ec0b3f0f14088b0e567dd28d323d784f66dba7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_sociology.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..18214f7c299fb79df76026c749b02e03372fce39 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_sociology diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ac42b097003ab1ea266739aa2fc4e1f123c4d9ac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_virology.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a51b8aefae0bbcc53d665bce15789a3802612279 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_virology diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cf9af3e9c9a9ef635048b1658b74d442459a9ddd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/cs/utils.py b/lm_eval/tasks/global_mmlu/full/cs/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/de/_de_template_yaml b/lm_eval/tasks/global_mmlu/full/de/_de_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..036b86192fcce6e3ab50cb62e00f96a698c9c30f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/_de_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: de +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de.yaml b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c09da2684ed79a5a0ddea3b6698e983047b1960e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_de +task: + - global_mmlu_full_de_stem + - global_mmlu_full_de_other + - global_mmlu_full_de_social_sciences + - global_mmlu_full_de_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_humanities.yaml b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..df571c67e9ee2c616f56e94d48ffd90ce6a1f61e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_de_humanities +task: + - global_mmlu_full_de_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_other.yaml b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bfff864e482634d6f7347f8806df1ef0e56b8476 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_de_other +task: + - global_mmlu_full_de_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8cf304a2c7a957c4719abc08367c9f4e4459976d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_de_social_sciences +task: + - global_mmlu_full_de_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_stem.yaml b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..75d1aa5a161df37d4c83788684982b220d43146f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_de_stem +task: + - global_mmlu_full_de_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..07cd235668a9f25450ce1ffd57753185b0ef5f3a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9deb16a6e6df64672019dc30b9b5238f33e44e7c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6a743d45543c0e91edb6c6989fc0fbd8a657ccd2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..37bf9d454e5622196a152629413be4a0f04d4a7b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c5ad878a8caab668fb2f404e5befb700f7f66ca9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..200f9239f0fca2f88c957ab8f61e7aebce68c52e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2bbc4d463a43602d4497b86cfe15b3a899469988 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ac903e3a5d8e74df8047a0078d87fe26911ac8d9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..616010cad26bcabbb820146fc75497d21f075a20 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b9648ce8898b2789733dc93f6c3915578ec805d8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d3bc689255cc954fc6af728ea932421d85a76dd2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fee01f9e1b1cd19894f68521170d71b9708164dc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..201c17d7c6f5feed59b24faa309cc8ed3926583e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d902c3c211dcc24584a2526093ad0d8579bd812 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8dcb6c48bb13e830773d668e1e5adefc3fe59738 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a1ca41ce60dbc58ba38cf891556c023063798bb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6e16729e7559e1de2b5f82a5b876937d4fb9c01a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a7b092892247185fd237111cfeae8f27fbce92b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0ad59551af9d4b396b8ff7bdbd4f64e5ed22e2c7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6c0fbd556d5da09d4ce4f5e33a303dfb308723fc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0aea5ada3049995d931389e47d1a34d275ddba09 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..97293b4983b298cdb8dd160562b7368039e26646 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d26a65d9707500d6978a0e7f5937180e3f86663f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b6ec78e696bf139ecb8c450469fd7e023a1b02a1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..53489d855cb541ded15bd3bd1a0fd0c880c0732f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..44a5666f724eec1420be7b1c68db1379b81443dc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3b911297cbdce278c3a81851af21bf873b539dde --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8d17d047ed69532738d09b035d3aa1a1d745f413 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ae7680027480500cb143703e9fafbd1dd08429e8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4c272287ef87e5ebd313692a32b521e90f3bd326 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9c1eff81145c2e12fe209bcadf93e453c8e753a1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..11f804a622eb4ca1b2af58a70e8d1351985903c8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7d5b4d775aaef844a5d7b23f90a86bd0cc13056d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b3f09c7fb5e7b69935886c254d00a5a44bbfe6be --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_international_law.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..34bb5918578631bc0db9656c65155761e59dacc9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_international_law diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..585e99b3c0f4d8ca5b6c54e498e71dbf2ce22e25 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dd09d6ad16f2c86c148e1ac1b64ee796dbd74e98 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dfe82a9b63b870d19e073efed4dc2caf37775ff8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_management.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7304da38e40fbe1099e2c6e23fab8a27708722b1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_management diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_marketing.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2143e4f17f3911ed57b255d0cf1931c100e318e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_marketing diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..015498683ec50ef536cf225222d0a0bc337ae3e0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0c8bd533b6a3d71e7c2f591ae7bd7e9c2f745787 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f03361ae6421fe80f2d68ab547baf110533c15a2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a36519a7349840359de7c4af80429c2c87c35c37 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..799065cb7528388aae0e644449c27d69779a68b6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a5f0372b555f9801f1f37a84449ca495de7d45a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2145e87d4ba25f03885318d52e3ffb7633a600a6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7ad55e9773a667c5cd11cb6e580133bb230895af --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6f4e338fa3466591c4972094a9aa06cb3a0adc3c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7a1214a6cb7b854348aac953bc7f5c562bee2d0a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a2d49ec8c903956eadb5d1e50b203fcfc3dcd90c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4b7d23a83fa84f1b3c559699d07c40c3a46d3da7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a1a3b22ee55e66b6cc225a921548f5fb8698d64a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_sociology.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fefef9d016d443c389bdc7224aef26c90e245ed9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_sociology diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..35394ab5bb0b9908aa4f6701ff04640e3775ceed --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_virology.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f0f2f59570dfae2815350009e617dfb711918192 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_virology diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1a43e6fe07f465b8dfac892586f15029edcf76b1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/de/utils.py b/lm_eval/tasks/global_mmlu/full/de/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/el/_el_template_yaml b/lm_eval/tasks/global_mmlu/full/el/_el_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..5fccad5ec1e5154501be5204afb01de5e6c51f6f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/_el_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: el +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el.yaml b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a77feecb2f92f20b20760179e3c89e3473fbd5ef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_el +task: + - global_mmlu_full_el_stem + - global_mmlu_full_el_other + - global_mmlu_full_el_social_sciences + - global_mmlu_full_el_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_humanities.yaml b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f07f2b5243f29dac15f46ed4ebc0536b6624bb41 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_el_humanities +task: + - global_mmlu_full_el_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_other.yaml b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..938292f095a0d6e1af6e085f75705d839d74f751 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_el_other +task: + - global_mmlu_full_el_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e72e1e9c89837b812db66b62c44f272d3ca0dfca --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_el_social_sciences +task: + - global_mmlu_full_el_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_stem.yaml b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2123be0887d6e1cfad81efa8ca37923f799d47d6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_el_stem +task: + - global_mmlu_full_el_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bc56c0696f2b21c98c212f8b7443a6b7d2c601dd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0b2e0e7ff4dea18826a1298914ae8387cb75a06b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7faf7389ae5a74efa0b40aa954def2329302fd97 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0e8b5bb43664ce26239af40025cfa6d8bc7125e4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..51ade42184fcf5b554bfebe8882748dfb2ca3600 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cf3aa362cf68b60c48dac28af106b3e1d66fbddf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cd8e1dacb6fa274f19ae106fe42ad081cb0adaec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f1ea08597254464f7c12286f5f6e4bd8557fc6f8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0ec055b7fde1aa3efa5009d2397ffc2380f6ada2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b16b545b029d469fc6b000ede0162d73218bc78c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a4630f7f82d1dbbdb33cf2470d82522c50d22fbf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a40228ea3e0fce6cc9afbdaec8fd6629267a69ef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e7baf6e602412356ab271bc7ee9388f2673961bc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..48e590213417ddbf4fcd72df0ce04b000e0e9081 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..294c3c5dbd6e7a5fffb62ea28de6045d2f70fbc7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4373d82e362dc56edd734fc5bbbeccf57e76f61b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..81799a17c9c3c26be1704fb1109bb1eeaee706b1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6317eeec7ca9aa662a8ef0fc3e75671104438a5d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fa5958aa1817fbf1b1cac1ca0da985d61a711b80 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..38053add37dc82b7e97328c29bf2a700c5ae505a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4fe73214dc389598f34506130a94c6d0c13e50fc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8beb1e3f525d454b7e5f25ed50cca3d44c1f0bb9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..22c083214dc41c9b6d13c0bb0a019e1f11f8d52c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e3b330411733cd8c1b96cfddc296026f1c4b538a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..63ba6a05449b86d16be5819086fbbc425244cb56 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f6ff6e2c0938a513f93f3ace33bfb69c7b401457 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e4deeebc53868dc12caffe10c57fe548d9e687f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cb8757033d499b8154d8bce5cdf49467875ba9c9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e82d1b5304bfbcbc0c6e48e328820b657529fec3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0003184cc21cdce0e7ff70a04114000a916d1172 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f5e0a3675aa48def57616e76c64f2fe9b295cb77 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ac460ea8bfdbe8ec13fb74426d7c8c74eed3ad7c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8a40e04f2e8ce255e1cb415ba1c6cabb5d897760 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de5075bcfdb0d4959b9a4638b81c351fd95d54a3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_international_law.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2fb93f2c8eb0b60a978a88377a0cc06786e7eae4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_international_law diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..624e040add336046e3a7748e1f93eb4351252e92 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7e5bdb4f33295bbb15d6855cfb2ff3fdae8aaea2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..180f3b25e44a21d081d8efd4bbaf9754766e0841 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_management.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..40487fb17fea9b6275277f2788e7655b13a1c837 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_management diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_marketing.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..781d41708e306028bba373fdac5cba326530ea49 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_marketing diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2ca0114644d6becab5ae8c56fcb3b11dd0baf062 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..66114367bec5d22978fa3ed8b91d7e3f0ca05a24 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c553ab7be81a621d20b9a71e7a0d35dc9d5466e1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..14a79a4a3f8320454fc476dddb51c9a0eedf7724 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..595daa39f90fc3c093281f737c172c6dd41d0310 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..25b121b69039acab1bf15344f21fadf4e86dc97f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5938a174e70f3e197837162419008d38e5b0770f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..002b02aa9bb5b61863e6d41038b5d7be3ed3fb34 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7b457038023c47923bc270de3e1bd469f2f80c6a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a31d4e3bbc609c7ee226bde72f9c60b6d7a45d1e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6e04807999538b40792cf5ecc0671d07554e3f40 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..264799d613ebb1d255640963f6f32f90a0f4794f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..19ffae47ed23ff8d4e9879ace2773f9ff9b406da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_sociology.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f57d3e0aa88d9d58898ebb5f9f976e3500c9296d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_sociology diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..14c76440784482cca0e75b59691f9597234aaf33 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_virology.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0e444358991bf517e6b82cbcb10ceccec819d6b4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_virology diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..60f8e52e31213ae6bf0590b003f6f2895f0f4a82 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/el/utils.py b/lm_eval/tasks/global_mmlu/full/el/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/en/_en_template_yaml b/lm_eval/tasks/global_mmlu/full/en/_en_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..ae7da46b25bbd0139e8655134b9796ca0f7afabb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/_en_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: en +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en.yaml b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en.yaml new file mode 100644 index 0000000000000000000000000000000000000000..648a10dd8fc0b25114212135efd3b9410f731cc9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_en +task: + - global_mmlu_full_en_stem + - global_mmlu_full_en_other + - global_mmlu_full_en_social_sciences + - global_mmlu_full_en_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_humanities.yaml b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4455fbcfecff47197958935d8662b8c3d432c9dd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_en_humanities +task: + - global_mmlu_full_en_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_other.yaml b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cca60e529e6cd3363cd389128e5509748ef1cb1a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_en_other +task: + - global_mmlu_full_en_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..becac7a59ecc3f807f1e76db06290501ed6d1ce6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_en_social_sciences +task: + - global_mmlu_full_en_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_stem.yaml b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..71aac0612bb7b92be5825adc32439426280ba7b1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_en_stem +task: + - global_mmlu_full_en_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3d7a5ed80c8278cc185e5fa4d4c32b908df44790 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f2267ad88190006d76f6441bc0fe57783ee80210 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6999c30f68b75d1cdb489c6da00d1b220adc6332 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..56a6e490ebbb4ca20413e5109f0ffaef83bf05b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..60425fadc666754f055eed2a79d47ee69a5dbdc0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9b5f2f8c5a7aa94bf280494396a03c7f2bf93516 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8e2ab91f91c4b75ff5e57da7bb87844059698a98 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9abf38dbdacd0678c1dda043535b0cb7cd5c7076 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5da6199fb39a769afd96a339b969913f604d1628 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c568f36b774e9b77efbe6b075861768dc91d5033 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ac0440196d5682318c820724242d9b0f058a1ee9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..be47dbde08f8701c2f9b46b226f89ce40ac887bc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..86180924fda604180d7d0897fd754e7119fcef6c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a75d329f4d300f849897c3742e4077e6c9296f70 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2568993f2ba8edfafe179a4a3a2dc43f6f0e1a8f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..622a99f83a31eff2e70f33b6290541cad71fbdfe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..109ca44ae2622cf99b48fecbcee4ad304fef5c93 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..39daa506ba791f7c47ef8558c16b45929c6a0c31 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..063392eb2b559fbd04c5bcfa347c1eefb3d9f39a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..452e9445db6dbe0950d53cd67b209296985a0228 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..baf4313624ae8743456d9594acd35e96897792db --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fceda5c29f63e86d962e63133f56e0fa9d78c7e8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4fbb9ade5831c9a9f8aae7b5d9c94623127b2b2a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..73ca9087a51f05b1134554cae72b8ae230cafe7a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1b9ca7a94b74c833e9b25927068cb188d1c1304d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9be50ad2621ad8cbc464c37d0cc6d2b1bbb95cb8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d93285cb33c1cd22702bd0a7204d68aa1ec85582 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2f74c609fc69e41a376e6ae194227247151c87a3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..365762ba0f521d0c68a456b922abdd43e37fac31 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d6ca42ad2bd3a8931e6b7aab00608734d59b17f1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4f20a4ddcb28ca9d05205c5b7c54bc03000a7b2f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d0fce40357b0ec24864631d552409460b0d2f8d1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..35320a854e7b693af4994497d30ecc70177d48d6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..86096c5d0f953ff5217381e225d60582a7b2f857 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_international_law.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8a41e9fcf854fceb9f7e3716b0f238b27a506274 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_international_law diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aa34c443322e3c0f916ffdadba01611f72fce916 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..50c105b48904692fcd3847931f383d280c0a2cf9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..35f496c17ff631774d505d45f703328273667dc3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_management.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d8499d9fab2412c309153fe9242eee1c6d8a1005 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_management diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_marketing.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..05f8f0ec9d19841e37416b5343134df1a1db4c7a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_marketing diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f272510f57df4e0741d6dd119df486495322724 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a72fad223f99cbc4199457fb83ed0de2eef5afa8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2504abeb6733136755801cb30af0fdf1afec59ef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4ae4c37a57d082a982c72eeb74515ef88af811e5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b5364f69d43ca4867795ee3b1b7c70371adcbaee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6e68d7e7223b19e2d07bd3b9d053630069ca7b19 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..72e93368a798333b1e43ad1734852f5123a3ea85 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cdb66ead8c7fb76e51cc125d10cb28fdae41e53f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..67120278088749f0a2ac32deda4580d6eab344b5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ffbcb29b38598cd3b796487f8b3f20175c303662 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1abea59b5d1bcb4c784e50f2eaf10eff3704c4d8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9df4f49189f72f29dc8da457b85ccfd792824f1e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..addb6934d82bc1c332e1439f55dfd04ad176ea56 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_sociology.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a198cb84cbb896d6d0d2217dcafdb62d500939f3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_sociology diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..047b61e00268e8b931c64b6de23f5ad1a77b24b1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_virology.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bb74fefdd32dab13a2b4d1e1fe7907e491a6377e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_virology diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2c453bf7480f8ccd4aeca4fbd58243151872927f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/en/utils.py b/lm_eval/tasks/global_mmlu/full/en/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/es/_es_template_yaml b/lm_eval/tasks/global_mmlu/full/es/_es_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..443af17cc8fc3a3e811f9bb4daec0dda1f16c660 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/_es_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: es +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es.yaml b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es.yaml new file mode 100644 index 0000000000000000000000000000000000000000..832001c1f2d759a4212b5c9c481792414cbc8e6c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_es +task: + - global_mmlu_full_es_stem + - global_mmlu_full_es_other + - global_mmlu_full_es_social_sciences + - global_mmlu_full_es_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_humanities.yaml b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bda6944e1b855deb5296a730cfdd9836f8331ed0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_es_humanities +task: + - global_mmlu_full_es_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_other.yaml b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..610366ef7d1fe1e00c7ce77583cfccb0869c8cad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_es_other +task: + - global_mmlu_full_es_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0094869035f3e24f390107ebe9dce164594344fd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_es_social_sciences +task: + - global_mmlu_full_es_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_stem.yaml b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..483a8fd6fd14575ab2bc2a87ae4767d6aa194480 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_es_stem +task: + - global_mmlu_full_es_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..02fb72001e3fc6eea4e049b1d8504a680c51f97a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..40f05e7beda0d8f224af0780790cee21fe6a9eb5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fb688c13ca5c63973dce474461174eee2cd464fe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aab858f1d2e638059b0d34a48ef37552164dc79e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a3483f8d08bc1e764a357d1afd62637f62e4059e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..36658ab6c40881f48980870f396ca8c32a1f5718 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..47a4744497e564e5d3a199b2b9e9d2ee45c8a75e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4154324e4f77a2143aacbf7d76a417a05ce4a303 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..85bc62614565b729aee3a7c875cfc5da5980f7f2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..40e8d1291ec14e09a25bad922a56ca751c809415 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7ebc5e950300544903aab2d4c4cca3b15a60ee24 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b586eb2b80f65b2d18c5d4cde05ff9f95849eebe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4186cec69574e7a8aa916c31af316410a052ae10 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3d61c8f9dc39a02b7fdc697395e8a049ac317281 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1a454d79664c52ab09cb275eb45808f2d4dc9d97 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..772436e6b533d8dab172388e5b0bc87584fa203e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..da6223fe38232995ac5409e98993c2d86faa565c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ae3b5912bb5e10d4a70ed5c3f73f6d180498093e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..79a72140de89f3adeaaaf8d13b2e8502e623faf7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..27ba757082d345c4c0973a7039ce340951d89b68 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..72ad45057bd8fa98b7f226790d9699943b7254d4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2cec9d5f6b4ee527ad0ed01716de92b93c6e9628 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5ee91f710cf4372f01de5d0ea60749707d2c8e5f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b3f1031993168ec87fdf05bb9523ada6ae771935 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d555129a40a64ff90972be9ac8d8a1cdaa6f67ed --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a1216336182fb78fea17a50a8ec1e70f7ff234e3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d4c2884439ea7c9f44156c5a29cb0455e0fe04fb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fb83ad1e44df9554f4b956a678fa307b4c2e1a14 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4bcd53e48b13aa2d8b6357809ad142f415ad01e9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..900936eb1b27ad153842e7da9115325a27fb8b2f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d54acd65afd3a0cdac8316f6fc3e4b0ccaa43b9b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2a654fe86568bf5c602d7be916450d54d6ad42a9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..47bd89009fb74d4c468b6deec54322053c2c83c2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..29925c347fe23503b67a4b1738cc8e909f81078a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_international_law.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..abe4ef94f0599c875caf5e5dec3b899042dd30e1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_international_law diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..751878fe1d333017230e639fda582d65ec304344 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..55233f7f20811e1c95650dee1e92862a50b34f54 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9a11e310c89164737d89c6281f9589c038c2635d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_management.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a31b4c268096dc79b15249de064268fec3d4d784 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_management diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_marketing.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..22136569ebdb5755787b1bc37e7c74d94febf58c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_marketing diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..18fc7a23e7470e44dc15fc47569c083aaecc0e6b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5b3955a93e27cee34ab96ad17ab688eb22418043 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..57095856fa16bed191f32241ee7b8485f7a770f5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ed31f8cc97d0d35c0b6e31573c095aba1b847979 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..07746d096513d1508cb8b113ef945f40e518f153 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3853e1622e915eed12b6d667fc82b644fa07f49a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b75ac9df71c2b002439dbfeac3e860c06692d07e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..da8fd46ffe348f81607e2cf4d7072b0955ac0102 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ddd0ab3d121a4311a9eff4533460d1912a1f5239 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6be1ae817fc5f2d1127b05d31ab7b1cb3617a841 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cadc7f964caa20ddfa75e9ed057098a52203a6c0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..72609ea90d020b5faf9fcea99139394e826ae330 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..319123c61992678604578547f13f95249b1fc62b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_sociology.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dec44c2938c6d3cec700c08e6cc2a0c22d5d1fa4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_sociology diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a18a3942d6a759c9a31529187f8c84bef3c30833 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_virology.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b06431e21049838070abeb45458538d5d283d837 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_virology diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4d9d6b792ca9592b53f60e7a3b9fe52fcee62a42 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/es/utils.py b/lm_eval/tasks/global_mmlu/full/es/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/fa/_fa_template_yaml b/lm_eval/tasks/global_mmlu/full/fa/_fa_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..952259b2fda8a1fa50cfbf36194d8444b8f8c947 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/_fa_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: fa +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa.yaml b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9edb85405b50f4d975117cc907a23dc8a1d7d8ef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_fa +task: + - global_mmlu_full_fa_stem + - global_mmlu_full_fa_other + - global_mmlu_full_fa_social_sciences + - global_mmlu_full_fa_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_humanities.yaml b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f36ecea5f2b4ff98d998328d66b0087deeb33849 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fa_humanities +task: + - global_mmlu_full_fa_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_other.yaml b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dd57bb86bb118224258068151bcfb77f91afa321 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fa_other +task: + - global_mmlu_full_fa_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9e7da860f408f871f6d5f55ecf7bdd92c9aa81f8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fa_social_sciences +task: + - global_mmlu_full_fa_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_stem.yaml b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5bf2eb01368a6c86ce0fad30e04ee321dce8cc8f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fa_stem +task: + - global_mmlu_full_fa_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1014795f0ffabcb8cbd664a35c4dd1107276dc3c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..317705c92aa860af17b86d55acdddc4400d5b1a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..45475964110cab623ae40dc71f5e136e9db32fcf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c0dd60b702d06470110f6b94c8ef3ea7bbf31b8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a7af0e21f2e84c0c38bd0839c95dd1e00f64f7b6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..31ae6d71c4e6586b57c1f54fdcbd49c13644ba32 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8b099f413a68244140b4ae16bb5d9231f4e34555 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..07491e5bf7f39a26d409cf15d4613bb7cc537acf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..774f6b970e90239efc066e80b544dae0e88edda4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..13d6f5a2ac910fcef23865e4215531105932408a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1e415b8cf7782d520499d84f0101f4f77c6f3c13 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ae47213b39e79bbee367dace28219e446e388a99 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c3f2ba4ce1c465cbe343c6b358c241e8140d4b18 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6cf79a9229c102bd62100c56cd3d3404f7249026 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ab7aa8584e93622225c370311282c518671fc21c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b83f6ddcf82ebbe572c7e76aa636332f10ff6efe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cab2effa03a184bab3d8486530151642708f6e75 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..93d11b75b9a4039866238ef565766e69dbfb45eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..59b6869bd685cb7cabd763d03c3497437c267ddd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8d15d4b4ee4336f73450e0415f949be6404050b2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a02df4f13acfff661358bcfccb76fea1d0a74adf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e18b2c7b6bfe10d4a3c0bc834c5e64c1ef4725d4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d94c7e89fb70e84099c6be16ddcb938e10a96e27 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e10078959c1e99b93684ed5f9cb17d58cec8d3fc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a9ad06337b8e6cdfd3b474d9baf2e2ced4f267a6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2c733b17def83597d8787fefd9a12df5ed63ba53 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4f88f0aa335190798cdc01c9acd0927822c9065f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..64fdef984a0f16d87645682bf15a4df59f19dddd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c43a115be4261a4ac303ef88a5b0b34471b1837b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ebb4e82d7390d9f8496a5b31bb4b07ada19f87fa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a0041e33e03b3ba765e14bc5d0e91eee76792382 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..66f38f54bbbab488cb994b94e956bbd41772f1fe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..95a2addea9f833168e40a19cdbedf741852e96a8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..475a71fdbbeb22532ef620ceda4bc70d03445948 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_international_law.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c0d6aec2bbcf9c60bbb450ecb4e5fa4fec25a851 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_international_law diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0d82bd5a64a031539f02338d163082f2c6bdc026 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a8e89d3c7c16793541b93f5ad406a753e81059ea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4e4d1a8a7ad94e6ac37a4ca3530cb982e0f9bb50 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_management.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e7e592bae4a02a3b7959964f49f3fc5a58e49c6b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_management diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_marketing.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c0e7ef1f42c6ec8e42bc7bcb346af99c9f136cdf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_marketing diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c31679ec0ce09c4626045b59dcc0e835d3f28cc7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..652d5a3335974f6e32ab58371e8b6dce74d1c54b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..16adcb26284ac93d5fcc64cffad556399bd35514 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..92d018f2508759a5eba64ab2a17867321e7e43f4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ae7e065ecafdad36c7fc421fb05146ef3a8ffee3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cd8513da780e3fbc9f9ea604c7b3b4a6800e5bd9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9fd6bb3d0a359bef99ff00477d3bb70c0b889bcd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..99f6c316413309444d15268e4925aabed113e9b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9fee460a41ec892865ec1ebc540d91fb4d219abb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..13d67d457680de3c7935f0b4b4c2571378a65201 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3e821145e4170de1337027c30b58aa9bc833e443 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de6cc3118d20e7ef0a8ae248466192ed03134eef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..64d5fd14a5ae149470df306e0b997372ae863935 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_sociology.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cf3d9564b1281dd85210e9a08eca428f1cd96bed --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_sociology diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..38d51936c799103492b48f79c1f9effd5d4b0cb8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_virology.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..39c5188d3406d388333cd93ed08a107c78f9d5e4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_virology diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..44e6fc82993da5e4543092433e2b13868281b765 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/fa/utils.py b/lm_eval/tasks/global_mmlu/full/fa/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/fil/_fil_template_yaml b/lm_eval/tasks/global_mmlu/full/fil/_fil_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..32dc097a1d098439f5bc038ec4fbdba97ea6cdf3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/_fil_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: fil +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil.yaml b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil.yaml new file mode 100644 index 0000000000000000000000000000000000000000..24fcb6d28e6f24a87b6d9ce7099be206acfc9e8e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_fil +task: + - global_mmlu_full_fil_stem + - global_mmlu_full_fil_other + - global_mmlu_full_fil_social_sciences + - global_mmlu_full_fil_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_humanities.yaml b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..061eb818918e2279668ecc6087e8f76de9a6698c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fil_humanities +task: + - global_mmlu_full_fil_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_other.yaml b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fea793eff0e83703853feb8b28d5f0f4a24dda7b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fil_other +task: + - global_mmlu_full_fil_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e9f793305951ecd337a7522f986bc5a5935f5a16 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fil_social_sciences +task: + - global_mmlu_full_fil_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_stem.yaml b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2e567c70b16de81204a0bca56062d14c4a5fe2da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fil_stem +task: + - global_mmlu_full_fil_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7eef19d6d1a0c959e3d7bdf60adbc8e02f8e0a48 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e87d8d803487fb44ddd992d57b13bbdd82e54e7e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6c258877fca7f68475467dc32a530d0274abc841 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..139f3cccffb7e55ae7128ed615b76c561aa1fc01 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fc160a99f142e662840d33b82d51c867a4068ef5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ff6fa3d845e14df911bcdd5ad23da3e63c7588d0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..61f0df50865a6b95a9237e6ff1bc8ada97660c8d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1385b9345d53f99b935764d70b4a63d31bec5af3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..afe15d7d57504b28c4dcf4980b0db8d9971612f7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..221289f7d0ec8a5c1d31f0f12f0f688a00fe9195 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..863792b3cf27e931b4d42c0f96f1f3c68267a919 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7971c6065cc62f24e9672417f4bdb812092aa7aa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..77a75ccf0c4bd02d58b1d9c2a9620e9fcf5e923e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bd98fc8db07bb15298c0e7c6bdd82b9c184bd408 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..98e48a27cc8962d5bbafcd9a39ed9044b2a374ba --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eba4149c2e842f4c9902f2052d9cfe47b041d751 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f1796059f72356dd890f53bfd842957e38aaa90e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..96886181c13c2379a0a13d412a981158dc367e2a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..93d94120d327f185b11aefc04e58e9e68391c8bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9ec56d5bee1b08d299e5bb5f28acc3d921cca59e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..82d86aed2344f5f4e1e19231769a5ebeddf38fae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7fcdec0a10b90d8fd61f9a2089f6d3f048e2dc76 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..96268192265344046079f9e7abdfb9a40a02bfee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f826de3daa160c9ced3f383fda400da34c0b4930 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..104a70881b304fd59ead25e02893891851fa8359 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d499b4d948b2a988af49faade7a275450730dd0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..43fcc04d93cd66edeae07a40d69e0497384e5695 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..175f31eef74c214fd11b660c4cff66ae8196e128 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2fc2dd5ca57af8a8c3df6d79cf809380cfbacf26 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0540d57c845a41600aa31326745490af328374ad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d0801af2d0536d6959034c65bd6156e4cd4e088e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..724b7ce81de1764f0b0ba878fd6d93d408941cb7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6c2c114123967f126ca40f7a22cf5eca97c3a743 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1672d5b2663ad38baf140903aeece3b5ad87db64 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_international_law.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4c5da91c50cc864d0068464ac52c4ad851e3a9ce --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_international_law diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dea2b20b8f28d320e73a02b23af2736bf3ace1f0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6a30c7243f07dd46188b08085c8440724a9da418 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d2a7062c232c0165020b3cdd7d9788950fb5a0a9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_management.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1ea5683556e38fc3eb03128709d99fbef2e12cd4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_management diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_marketing.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..82d4490adda68f3db5cb97bf3514d5ac7868d2d9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_marketing diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bdeb0984f5d5e8b5a18c38bf34a6bfdf67321152 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..51c56a3bee09eda53a834b3f712d4ddadefa35e7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..53148a54249fdfd2104cf5f92357f151fb25b3c1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fb5fecf2edf0d8d6625d9f27b33a769ff45ef51d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..35859dc2da609cf6fd8e498749e747e638d3c459 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dc2d414eb1c67c48da44275f4a0b189bf6ee24b5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..abf65fd2fcb66180ed45032df7e65960bd21473b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..04ce343642b8af545083b1dc21ad269f512d2c94 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e5694cf45bb2db7ae2a0a2dcf60b2d9b696842bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e7fd0446f901869c9a1141455f9c3d55b56dcc37 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b9ce14aa062dd5ce11253374373157c24d6160b4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fdae52986567539b429436b2ff5609a30856307c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4a03eec195604894bf1666fbf745699a98f6734c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_sociology.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bc0ed0523e6448b4aa339636c460741ee11d3e47 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_sociology diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ed40afb61259b660310b1a4c7d889997754c6e34 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_virology.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..85ed4d427fb75031c99277423fe9765575d56eff --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_virology diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3ee6bce1834b1a20e3b1d23946e97ff1034cc5c7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/fil/utils.py b/lm_eval/tasks/global_mmlu/full/fil/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/fr/_fr_template_yaml b/lm_eval/tasks/global_mmlu/full/fr/_fr_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..47ca79725c1378eb5cef7be5d952cf02751c9e68 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/_fr_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: fr +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr.yaml b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e85d6746cbffdc8415a673177cf78892ea9d71ab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_fr +task: + - global_mmlu_full_fr_stem + - global_mmlu_full_fr_other + - global_mmlu_full_fr_social_sciences + - global_mmlu_full_fr_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_humanities.yaml b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..697e3a29e03b0a03635f6ba49905682ddbd4698d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fr_humanities +task: + - global_mmlu_full_fr_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_other.yaml b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9b2ada6b32d371f79cedc9539c3ea21a9bb68e79 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fr_other +task: + - global_mmlu_full_fr_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ac7e4605614a20ccd67d8d88172436c105a8098f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fr_social_sciences +task: + - global_mmlu_full_fr_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_stem.yaml b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c81d601f35758afefaa6b804981941f494b8196e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fr_stem +task: + - global_mmlu_full_fr_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bf7d76c383a6d8087c010b86e8fe7c5d2883d6b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e9a96927c00636d01609cb2564dc5d3fee228638 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6e4ca5a7691cd4800a24ff7032e1fa66106bdc72 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..df3c1fbd6b78ff26410002dfdd525d69d0a6c876 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b0daa2e6ee66095c0065e05c3f5f966cb042e9ca --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1e997578893b0bd98fff5ebb2696e4e759a032ca --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9c1c31897fcab74c2eefd8e564c04f5519230a7e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..078108f8c7e73ea0a2bd731648fac024b5d5a042 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bf2f2940e1ac269c5676cadc3639bd720cae70fc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8c9ccc80e79accc520c4c865f090ea399897aa40 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..01dcea37091e85c421f9c4a3009d5159f69f5ba5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..794f64be1db599f4bd21d60297397901463e6bbf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..12c6afc2abd8fb8728d727455f77bd7e77b49c72 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f33ddaba54867b2d651a3896b1d87562efae508 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8dcb058564a29b77f441e872340594c0f8b42d56 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2658ce96a477c39d7a8fa6f166b73aa0a4bf0797 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5239cb1c194a08fb6adf2b6535ae5e2fcbfb6776 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2763dcb53d53033bf7f6389f0b64d2f361def3e7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2a6a26c9bf4bc956d29fc0c6d7cad66e2b2cbe80 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6ffacc2982af8fce14dca0d277e2795563dbf966 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d17204224942f109607411c8d47fd1d23148419f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9788e7be702d49a032954c193524636049a1ed39 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3e2ff22eae27b956947253cc1200bc2ffaba2bc8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2aba3b6121ec8f3075e1d3873c417c8003b5a962 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..21fb1df5b34f62551946632c754c03e6b86ea1ce --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a975d1fcc7cde59838f4e25530e5b4e075a7d455 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ff654ff35a003c1c2392d78437c25bfcf28ce772 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4038c956dbeeee5668641b0ae4204d1a91bff2eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a65da78026eadd2ceeae2461884bbec1156f24af --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..37c75136b2ba62f3ddf2170ef5db426ebf4ddb48 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a0e123f267894898a64f0cb16ca6e02f4c5e38f3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e2a9cf6b143899e697e80d2f5cfd0316f6727c5c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b9e9ece96b7b55d26517e842e719ef28400cab36 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eac30d27f9147f22c136b90fdf54f6235b750033 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_international_law.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2e15b0fb1064d77e6035e825a34ef13c9162e900 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_international_law diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f42079c92eb665517e9afaeabbc016d5ae2425fd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..68ebdb717525e90095a2284aa9d0fc95c2018676 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..25a8df3a87ed22e928ece07116306dc3b488f48b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_management.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..73f7d869215c82216e9f3826f1313a25d2834ec0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_management diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_marketing.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8a19b83e9a2855f0dadfcd8c5c33d962ab5733d9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_marketing diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d15774f13f7766baa9f4a77a2e11145c75fc1b99 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d7519709782e1dd04c51f87a91ef32f403ae4e2c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f625921e5d4ebd4136ee0e36d6725a9e856ebf45 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4575ca044297eff4da62096a1f4911349c559940 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..61521c91da4d5597aa1c31450e1b18a0115f9771 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7f771c34254a811960b476324175d5592414c237 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3bcac0f5b900f65de1b502464da1275bf7bb261c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a06a7af518877583fe9f1e1a83eac47afa9f54cd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2ecf2e8e7014784d6a6f644894d719b6a36f3df0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..983a2d38053e4beeb5539064de4583e3c9f59017 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..59d9aa309b3c175f590251de47f138b34feea201 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d84b7ad07cee145f31c5ec81f7a3abb02abb089c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fcd82b7d8d6e088d3b5aed15dbb708891bf75aa4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_sociology.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ff7b8fd3185d377be099f72e04ba03e21d4839eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_sociology diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d92c20955a118004738b6a7c606dc72a0e7115ad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_virology.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..211c96a79fe7cf5166cd18d10e538c9317537646 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_virology diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f1f168aed5d7ba6d62ab40da70f71b00916bb620 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/fr/utils.py b/lm_eval/tasks/global_mmlu/full/fr/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha.yaml b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha.yaml new file mode 100644 index 0000000000000000000000000000000000000000..08a958bbabb85fae3b1a20ebf0c4a81db5962a41 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ha +task: + - global_mmlu_full_ha_stem + - global_mmlu_full_ha_other + - global_mmlu_full_ha_social_sciences + - global_mmlu_full_ha_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..84cce38d710ab046922d3b371b68f9b939f4e248 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ha_humanities +task: + - global_mmlu_full_ha_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_other.yaml b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..73a6ea0dea1b57902569321973002fbfc072453c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ha_other +task: + - global_mmlu_full_ha_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8b520a5dc50a793f2cf5e349baf416b69cf76332 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ha_social_sciences +task: + - global_mmlu_full_ha_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_stem.yaml b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6213d280b995bbc25881afcc989969965cce1308 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ha_stem +task: + - global_mmlu_full_ha_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ha/_ha_template_yaml b/lm_eval/tasks/global_mmlu/full/ha/_ha_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..8521fe50d463022e4066bec1fd24dd458c48599e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/_ha_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ha +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..62ad5e7d7bec45d82452d683a6a7682b65dce287 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2ead0f6c11557f972900bbc21a4a94e578e73d93 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1616398f862e1ff6b94b6ee28a60596eb5c44110 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c1719b0e0d609b3af0175267e071dc9bfa09d87e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dcef5e272db17444b7ccb484be0cbca2d012e181 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f28256946943960f33f8f6b4bac63f62f755284c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..73a422e3d790f5e70d6990f3008f6131f518aed6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7bdb65c9e0542257eeb7e804a5c33f864b2264f5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..47e5326cea33d8427b45c5f1c5b7bff2bb455767 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9065f0855ba147b109db2e21f9934cc6f57c4a56 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..40aa11c5d1f38e6aee1268951603d5fcbd705e3f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..38d1e9c256f0aa791db1e945d697dd740d7f0f99 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7326514a452183cfcc603c31a8dcda1530229fcc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e865b6bc620b2fbbb79df705cde11f1a982a18c9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9457d1bb90ffc2d818a9ca10a762866d2c243c55 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e04fb1ff63dfc2d5980103c175d9709e60fd09ea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..03c9cbac0617afc136a3ab0a1e11d7d64974f3e2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..db104be57b25f8efd7f4238ade9ad6f7d757a24a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..729fed2bef95ce20bc22510908d99174ec2865eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..13f5621b2fb49aa66933cb9c2a38f8e1f2010065 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1914e1fb86082927733adab77490ffd613c1f93f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fa878b037d99406d08fc104b5099366c39eb6ed6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..10a136746e32025589512aad8a7e9cdf208ffabe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eebac40961da29be7dd22d2da3895e7efb7cbf3c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0a22ab84293fdcdf9991cbca6ec33c70094615d3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fc681f90927f465b5e1d7e95d3c7e889d4f01248 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..81bb343c37307ce9aaacaf4f55ae3b832a0c1753 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2bc4cc4d9d2c487ced03d857086daf7a768a8052 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c5d46e5ec0358ce24e3cc92b571d6608a5d8fb29 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4848cc314c8038896b463119b1b02e8bfd42378d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7a22c79abaa7ba6e9cd575e6898d09debcf4ad5b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..13882279817265b164fab2e49556770333e673d6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..51ff436ba4e0fd6d30c9cccccc1d7efbc19f6f1a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1a36fb8650e36b4a71a5c9d65cf676cc02be0fab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f1c9cc1c43e37f85d3fa9e9de402d925e4b9754d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4bc1314ba04e21d323124278e20ae87c4c7b5170 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..259534b9ad217196da25ef9397e136bbcfbfce0b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c94a073b9ff6a08893f2bf41b38a690ef95cbb8a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_management.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..666d47202e2169eb8aea1b15f7407b1851b2c3da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_management diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9528a1f371cb4df01d394dcbfce4bc1098a2e56c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..92f0a408d8079a133586d8936b1fd0b0dd35b477 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fc97a8dc9459ce344ff61bc5273c2fea5d152ccf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dbcf96c22ddebe8754e274b9d94f3c94aceaea7f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aa7b426699d3e43acc47f8e2774b0543aff4b992 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b413e4be1fc0e569c7e7289f1dca8c78ef9b7972 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..118e480143d13d4ac69958c2522c6e4364dbb8c2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a310d02303ceb14d4cb741ee88617082e4bffb1f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..79536ddc615fbf3d6ed30d28f5b810c8b2a73dad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..613170da1f85dff3253bef9bf217b6f422d80e86 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bd65c2330ce63aa9a9a126f41ae77225f3a69171 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cf7ecb1f5aaa65e80349bd820fedf5e408348242 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c9cba53f902a106782a6d8435690ff10ad8cf2fd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe767686069acbc9e8f0c51bede57ed047a3329d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..94f8e311fd7b3da1b82f3e092c9779fef26871be --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..54f82b3f3cdf00a80867896657a5659f82c4c862 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_virology.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ce7d224d5c75a43246415ddf9699867c655413ae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_virology diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..67a6d33db7f0124e6261a5f31060982f0a0e9f6d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ha/utils.py b/lm_eval/tasks/global_mmlu/full/ha/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he.yaml b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ff0a5e8f22064d838477595e091ec8dbef001c69 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_he +task: + - global_mmlu_full_he_stem + - global_mmlu_full_he_other + - global_mmlu_full_he_social_sciences + - global_mmlu_full_he_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_humanities.yaml b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..678ee0d4c308b9e1c3cb492e088f6b7fd881af14 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_he_humanities +task: + - global_mmlu_full_he_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_other.yaml b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c99b4806afbfae831bdfc9e0248aea0e5bc7ce71 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_he_other +task: + - global_mmlu_full_he_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..12906895890f10327ca750fc03a6f1a326e59a8a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_he_social_sciences +task: + - global_mmlu_full_he_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_stem.yaml b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f6e76e7a87c10d74a947fdb40107148244c582e9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_he_stem +task: + - global_mmlu_full_he_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/he/_he_template_yaml b/lm_eval/tasks/global_mmlu/full/he/_he_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..b6ec9fc82a8b0747d0042e4654a724b872a1becc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/_he_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: he +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fb197c3dee961941080d5b88f1febd1abf6a43b3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3ab9ee20e2df2ad2769a4ab644f94fb05f788de4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8950b1e039cffddac8bf1182c0ccd3c9be4c517c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8c114348b85622a32550d967b55c20f916ddc0eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1324a04dd97d72f8c44b7d659b4ceea1b665e566 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cecddc603f45eb35cfe4ab80cdbd8c27cffa4681 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2c0f8b5d1c1e4176d149b3e90653aa1478101ea3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b4c36a41443d998c85a309c154316eaeb19c86f7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3633d53740d18674d0e8505944628df47fe1b631 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a28c592e502b2a999d8978e3173d6a3631203d2c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3893b9aa04956d4641327bf1bb53e175df65e9d8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4167874ea9a8832ea099fcf860d45acd975599cc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2ee92851a474e6f93ab2fd4b9f4d6d4ea5b5ba77 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9ceb32776617c04b74f1cf0897ff85618706ac6a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..00658e28e44c99519557fc8f94cd6754386aa8f1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..10f2ac18dd7c0de14e1b86d28cfacce1325be8ed --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aac3f8dab339e87450c248d5ad7f5f2a7ae4eb98 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..299a73efd25f09c7a5f3d711ec5aae57c2fa3fa3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9d3ba89356a66c2b305d097e1fb353fe11903338 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f67f8ef3d850e5a2f17a55a8fd4fb4ab9db25c1d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7ca8b6f86de781c93eadcf313ccbf8127d8df982 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..58e4081a54b25820f74518200f302130aabcbe77 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2d76e3879d443b2247886eeef09bddc42d5781c7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e374511047550cf157d05e52709b408b7a8100db --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fa0b7c711d20a4527886dd67260fa83b2bc760f0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7f78a5c59f0ceb158b74ef7d08c22edf3d518a41 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..15be9243320a614d9c5f04014ceadef1b4d1d50b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6f309c0bdf72c16ee2ef039f38fd964b0f6473ab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1ae831c6048da958d651349f7e9e802c01fc78ec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3a2e817040d3c3783a88c9b4e2fe734f8560123c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c05da45aac6dba2333c46e2e7fb108d87fc8198d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b818e4fecc02f412354cb8345ecd969af36748a4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..49f7ce5dad10a20012dab1ffcce2c26243227004 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..91d085670e39b875eb15a8f5d9f2a3e4aea1c9c6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_international_law.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1bedb4f42b654c195491149b1bf8a33c169ee62c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_international_law diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..39fe15a2bd819eef516f62dcb4582a2d0044414c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e54b58b33115f87f1f46c8d49c96c7e2753b1cf3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8190e96ace0c789304a56e2ea91660eb437aeec4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_management.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d5811f804315c644c71bd0043d9f7726c78aa404 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_management diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_marketing.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7fe44232ba6fd70a2d2dde0ada318dfce3d38298 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_marketing diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8c9082c5f5482d7a66fadb34c240b333748bc7d0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bc419deea25f0070c98939f0fc2e274531b4a8bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d889642bea8454eb5c7aa58fe8a7f7c9f15daa7c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1155482347a62cb9e36546c567a3f0a84edd926d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..30d49701b51d7dc1942b264143d42b2dba1103c2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..458632de43041ebbfe396014a8a72e96a4d3e499 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..938356731ca520c0cd0f80735d510c4cecba4f98 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aed2863685feccdc5acf230100c96ae90492c937 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..38a9e3cc2e44e6f0b68205a6251fbe7c0fe8c822 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e8ca950c549efaef2a513a48999f8b44228dc4dd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f82c289238a8b7f11f0cd70badf857cea4bab501 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e3aff6615fe0726df00a3ac8913f815852421886 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e99aa0154cda8d17d602c4b2016e39166df1b4c8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_sociology.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de81b92c9f98341d26ebd762f5fbebf51c941265 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_sociology diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7be650440d82469e4a57826af165f71b2706ef73 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_virology.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b6f51e1b93a51544a8cfec7f37f0cde169e6a9ae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_virology diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e3d10a0d52724e6f969bfd3f0f785592f3806d66 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/he/utils.py b/lm_eval/tasks/global_mmlu/full/he/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi.yaml b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ed54a6ad28cef3db46b1503dd713b43289379c9e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_hi +task: + - global_mmlu_full_hi_stem + - global_mmlu_full_hi_other + - global_mmlu_full_hi_social_sciences + - global_mmlu_full_hi_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_humanities.yaml b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..36492fa3fc1234f1b9996ede7da4f88aa7c76b8c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_hi_humanities +task: + - global_mmlu_full_hi_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_other.yaml b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..08dc16b0f679ba0948cf4acdc1e46f23adfed45d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_hi_other +task: + - global_mmlu_full_hi_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0a4dfdd726d67dd16dab87e6fcda796eba0709b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_hi_social_sciences +task: + - global_mmlu_full_hi_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_stem.yaml b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7a0123ae4a892a3b73962c68774353bb5e54154e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_hi_stem +task: + - global_mmlu_full_hi_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/hi/_hi_template_yaml b/lm_eval/tasks/global_mmlu/full/hi/_hi_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..18c6286eb4489c2763f1d1c82170c20deff664c4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/_hi_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: hi +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f239f067d718c93b757df84a058911fad7113d60 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dfcd776ed8cff6429d2646b068f585dfa8d50254 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dbb6763d3e2e00df332526f7452f96bfe28892be --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5882427e711ef090cddc5ed6fa9ba10a9493a3b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7b0c6c3b0babc254cd924ccdefabbc2e62b589fc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d5326c8dfb2fd48a423aecef01fe5a42c89e01de --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bf9e21300a6017ceff10fe8dc03b9a2c4aa4314f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c79f4250c810f6e924f51d0bd46355ba1c6d7a52 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4e8b0427b43eb4f64661b646aa8795776dfbdfe5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7e8c0df2cffdc21b45426faf65d2aaa91e4fe05c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5fe337ee1fa73fcaddb77fc9979d0a0fa568ed63 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..029a02e07af38d65273bbf4d8fe8edaa53d455e4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a674897423d88080d92dd1e7479c81c96396715e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..355053b25b7236db96c18061c5bbb405bbeb79b2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..04dca10d70fe4a2691e9322fa1fe77af1bda365f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ca7a30832596a2a0e131a02c22ebbaeb43cb1fa6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ae534fa62f271be4d8b940342f50aea81f7ce2eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..096fd58bca90fdb6b8683c2e9b96eb62569e19e9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9ef04ee573cc7d023669de996bbeee1a7e970349 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4e8913e5217d8f0ef9244338d8c33ab7deb992d2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..180eef7563b937ebb38335a9c48f1750c38025cc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..32abd63b6e3f1e57495347cf054c739604728d8b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1089908b082a3d6b3d93572850014ac8c16a6c68 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fb22bb5106a66ec4a5214a00d348c4fa90eaa2a6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..affc27c0483782244eec39a806a1933ae2213bf7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..59f97c949e8707c0e142309684c907479b108cd9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a7506a4cf2fadf837cc6117dcdb557e6f251c839 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..406035bd423464a4af5061f2cfa77318654feb9c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f5c2be371487841b9e03f7da21f65dee262ec0eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a955febe331ce3cd339dba9d4ce9240561f4cfb4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6a5573f8b315a79b0b5d1a9df64b1b8046adcfb1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..38ce46800daee48bcb3eb6f1913c691b9295e5b4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2486301f8c7b6ab5123c77bf8fcb2073cd2d4e64 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f889885b32e68a52026110e11f1e440cb04d642 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_international_law.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2cb0d834e5d7f54ac1aa316a459cfa2d8e34797b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_international_law diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..113291306246243d6ac0fda1416829fe38656877 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e22cedbe2a477bbf7dfdb3f4eedeb78e3ea22d66 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..134ab0806eeffc0d183fe38dd8648edbdf47e5aa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_management.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e523b5d1a0f00cfcc192157c30e42ce849eeacdc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_management diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_marketing.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..11d8930b739dceda508860ff3aa8164f0cf5355b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_marketing diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ad38e3e62fe7ca1b10160a3a579f243529eca7ad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c31f888329cc7ffc34a597b43b02c009e476352c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..01145f6f74bbeb7972d21252acb9ac222c79ad7d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4acbb127b8f67fb74ad04e702538141ee5d2ca5b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..889c0018207be7de113e0dba8b4d26565f91952c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2a8aeb4d3cc93d4f8ca29e2b383e65cea853a61c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ad80a3c086f2d069a4226834f1cb0ec4700635c3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f5477899b53f51f03731608130cf564c92b886f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..836d577d300acb9020fc49e76d746df5c8006763 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7a8e7db926b0f687baf4c6314da45d8aa651d7ec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b4ebc1a5f0f9c86713d68524e448e97d066da95e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7bbf959ccbedb6b2d79322af80db22ee05b14cd0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7faa9d43d9c82f0a35e6831aed097a3d82d44893 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_sociology.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b0ca49ae208ad46ed4d23aec781a3adee1e6eb4e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_sociology diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d5fd9f0ffeaff1811a9d1934170e4c0a867445d7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_virology.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..843ea25409dfc301b5420a630d50bfcd9a758e91 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_virology diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f5e56ce044ac3cebff906ee21e65968c480732c7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/hi/utils.py b/lm_eval/tasks/global_mmlu/full/hi/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id.yaml b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f678660e38ad181ea40066ba0caa99eb98c87a76 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_id +task: + - global_mmlu_full_id_stem + - global_mmlu_full_id_other + - global_mmlu_full_id_social_sciences + - global_mmlu_full_id_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_humanities.yaml b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b9283f55ff308bdb51432c2d300bcc22da015596 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_id_humanities +task: + - global_mmlu_full_id_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_other.yaml b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..74de0f3607f3c32044a4ad5ecb3d67c67cb29c49 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_id_other +task: + - global_mmlu_full_id_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b8656b6bedca7e885e01a03871f183c9749665e4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_id_social_sciences +task: + - global_mmlu_full_id_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_stem.yaml b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d0e472764f68eb67064c4e11d99b0388e67dec74 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_id_stem +task: + - global_mmlu_full_id_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/id/_id_template_yaml b/lm_eval/tasks/global_mmlu/full/id/_id_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..32d9dc92ae31059320ceaf50425421675c6acaa9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/_id_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: id +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b18c1cd7f5ebe0a1e22e4172dce1dba6d1f95d73 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..65b83d9d750826041b8f0d024daef58d01e5b804 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..11f1047c113fc9d0415c003b35c15c3f246f5b60 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9ed992f22d79bbc0e6134cddeda647e9645c1e8c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8baa424fa121882c672c5a8d091268e817cf7a50 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..67b9c935b942d4cdf0ba35264938695061efa321 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3eb5d22804612c2e93a3f580936520c14ce1866d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1462945be52edd5806de1951a35f5285a96c56a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..980627920d2606e5cb3e2485eafebd7fa7f3dcc9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1a2736e891571b829aa205a74b51689df613c8aa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bb88c3f8cffd13bb80cb56586f94f44577ccff01 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9764ac3efdd13ec63afd28ee8f90e082e6ad9780 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c70c111caf02dd6276ce1af666f98df440e9e6f0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7f82a74b24a4e9c72b097bb8858c675d01536f37 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3cc2dfba0da0b90620b5fbd9624d839dd604fa55 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d511b4baeb397cdbd7840d00c3e461b00b29e0c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7c6cef13a7346b7aaf7016cbb4118f9d3df9abee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e7a44daa636b07870c61ce1639db26422b7b446 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d39c31abb9ce693788fff22dc2d8b86269d6bee3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d92d827af1eceb0648c47e2f36bcc03d9eea7826 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ff714ac846b2770c34ae7adbda94e92f65874044 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0d5c8141851b15325ed0e89740c277c836aa9593 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1ad392b3ecc4fcf43906dbfef56ad504e40b34b0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..850d6d828250476623396242235b809761eb6ca6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c1fda5c65ae4b91f0f2c3ded46daeea8d5297cd0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8a628ed9a53356ea6adb37bf4e8134458984d8d6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f2c447077f9ebd7e127d0c57d601af3fbe0b9234 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..75888a3d00e82373b6eec1e63fbe4628c3aec546 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8a6ff54ba6b995cf182a942d7c81b5f0b2ecbee3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ab2058026d71b55308832cb07155b60b826859db --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8dee8c3192c4edb818e01acf2dd4e00f1be27e24 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5474c8bacb189560886b46624c8eb0d37154255e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..464ac67f7f857e02ddd204abdcd3fbea8ab667be --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..518cb30c4e107c0da6da587889d999a4a91c6f42 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_international_law.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..90262ada02b54825d78cea5586f5782e396b4615 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_international_law diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8727ab4916766bbafa4de49e1c900faadcdda4d9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..da2c8e6c235015ec9f2a044cc836df650c5cc32a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..84a30d9d8be85a0c202fbd31ad4573be6b4a759f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_management.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fdd340bd88736e9b57544c865286c8b4cf13e29e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_management diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_marketing.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..caf3eb0f29f2bfe53f523d7755393e2a2c9d589d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_marketing diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0d649fd3e41202ec6bf77bcacd7d388e4a2de55e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0811f1b06f88bd4b8151821fe6a918a4206c52d7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0a124ded50f54ef01d737e47b82b54f1cd633ece --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..65dfaea7266ee9c31deb2b19413e283b6457ae9a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..804ffc602c974548470d4f80f08a39bc63d001e9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..88b37de87a71511f81dad12955392b9249d4dfa8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1e851c4929704957fd4d784f4e0902573f8994bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d45c9517413020f62e8e4dd8f4a751b40afdb2ec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..965cbad60dfcbb7b8d9fe5cfb856f996512126b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fdd02d534ab32e2afa85e3ec2621268e98920905 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b8d294b27045f1978b60c4149dc95ca9ab12fd33 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f772b0cb978831dddfaf8737b28676bd411e82f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1a73d36bfb2a02ec092fadef29175569cb2e88d3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_sociology.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..715e5c31e635458466de832de462b465d64ab044 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_sociology diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..59e147aea00069f3138f1ad8c1a561d641755854 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_virology.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..50225ab5f34367f75ede434f7e5f687045f84eb4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_virology diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0193d12d0bc16e6437b8b105ae6b52af9a42b1fa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/id/utils.py b/lm_eval/tasks/global_mmlu/full/id/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig.yaml b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a263e29556cfe4f2f771a15c8086c7c8c2903c45 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ig +task: + - global_mmlu_full_ig_stem + - global_mmlu_full_ig_other + - global_mmlu_full_ig_social_sciences + - global_mmlu_full_ig_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6c6ffb6121eefeba7f838bb111e7f019d6038727 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ig_humanities +task: + - global_mmlu_full_ig_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_other.yaml b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..214efed225785ada97a2a0219c8d11fbcbdca191 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ig_other +task: + - global_mmlu_full_ig_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e27fe1fa179b32b951e30a4f195137a9ee6144d4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ig_social_sciences +task: + - global_mmlu_full_ig_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_stem.yaml b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5dd33b62016ebe64174bd8626a9142168e3d7cf3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ig_stem +task: + - global_mmlu_full_ig_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ig/_ig_template_yaml b/lm_eval/tasks/global_mmlu/full/ig/_ig_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..0832c633b740ea5174f877023cbb5002de8eb1d5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/_ig_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ig +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1dbf6c8343d742e5bcb8c2db7ba386d82472864d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8dc198c25fa1a3241569b6af84f55172ad16d4aa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..078069eb22ea1ffc5ddddb63e8a129511df20c09 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f075e7404f9586fe4db598844e631784f7a4f4ce --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d41779ade0a3af33435afd77c5832eea080a445e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f0e57052c224bf00637d7a40474506d6e31ce29 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..78e25dc8d08b54118d5ea0a7adb60e04470f3a36 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d9894a454777e0761c57c2d35ee3468a5cccb1a9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8976041fd6471fa8f37675e68411c959457f09a6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5edaf0d5609815ef6d84b422b4ff80ac2f6bc566 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e55c01cbd753200f3e07f3ccedf261160beaa61c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5ee7564cbe4b5b4f99424a5b88df70414873ce0d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..555d4fa83db09031ce9961777b4753524b1e5ccd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..783804b889a15aa27c8f642267d21ef6630068c2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..789f95d20a29de34b60228d87934613d7d2fa134 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7a5c9d2c5551bd384894e4d5d889d11d21bc04c8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f9e426ca37bb8f3b0687dc58a5b2763bf24dd1c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d9b7955c66251e04e5db55bae6927474c3dc065e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..368bc71ddc19dd3702a0fa4bdd29cc0cef712769 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1ce77e108b330987057aff3d2cc898aa5fb9b9c1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d859f390f2fd59a9e09c6b608a6aa170ab8cbbef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..29a93f460b4fc085a1919ba23be8d64fed83e726 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..74194a44a8f79f4ad414ad2840767d2248cefa2e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cd53504d09d28babcb54e238ffaa4b1e82664ae9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..30244a64c3355feeecb73a0077d2a857d7b1a75d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..737c0a56c71f6a19b55d72141a2f8f3d815dbc49 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c5a2220ccbec2817da79f2f42066c1af4f9a704f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a7d4c537f50055076f263979c4a3fa5e23852f1c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d3051f013f1ca6d6aee0714e5722fc055a1b1697 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d48410323b57abe316a6ac7beb9f5e331d93c932 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..61e124fcfc8810f3023f59bb180997052ced4ee6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3d83a63d94732743d85ff87aee3f31aaa96d52eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..787e3151fdb9a57b3c4e92e32a04930c2319072d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5c61845911f8ad941f86774f687b17a2abaa6c9e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3a8511d215406b568862c08e3e9f5d05effb6bb5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..46254ea14c3636103dd882d2f61d666809725b39 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2bce7502915d3774927d494fabf2338f5e7c22eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..93c87fbe662456de254a891f330e5b83490d2008 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_management.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..780e1c894ac9987e841e9976cc1245e481636878 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_management diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2d30ece9a83aa81275a15b79766f25dc8979925e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cac197c78e0d813636a24dcfa3adf2d36f360292 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a38245103d61b773987ccdb1ffc2235704adb796 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cc545d84ceff698ad94e4da7ebcf114f0d3b8a44 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..60ad22feb3a862a7fbda02ed3e8b3e916c27e8ac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3cc556070e3aa52a43953dba17d58e719f5cdeb8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3f6556324598955b38ba5bd6d516a2aa5975daa7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..db4affcff562cc7f26b54ebc6504966ec7958898 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..18d3577349f279b26f95790282d1eac115e1aee7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e9db41d007aa732ddace06d4a43befb8c93fe8a8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7fa28b603d3e7617b207cccd464c83007b9b4a02 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..639be38138523058e2ca9756ecc97209eae07eeb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d31af09f4db98c7826584f80a25494ea75806eec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..200db46b00cd5e83afa3a136c08a03438f36309e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..65a3e4e1a85bb47e638b3ae42358f37ba9e9e17a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ff0b0505f52ec9435bd5dcd5f9e63ac6ab0317bc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_virology.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b437c82f31a01686ddda4e25b14f28e7bb24a8fc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_virology diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6fbc7cfd8237240d2596901daf74f316f0ba5cc6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ig/utils.py b/lm_eval/tasks/global_mmlu/full/ig/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it.yaml b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dabb89870806e5a7ffab9ddce14619b8eaee9253 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_it +task: + - global_mmlu_full_it_stem + - global_mmlu_full_it_other + - global_mmlu_full_it_social_sciences + - global_mmlu_full_it_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_humanities.yaml b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3d072ccc57e1c027d5b6038eb94715f01c916931 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_it_humanities +task: + - global_mmlu_full_it_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_other.yaml b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..99fe18cda989217594ddd42883c3c0b2870f29fe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_it_other +task: + - global_mmlu_full_it_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..15a457a5592ab4c8a38c67c828e98c67896a3a48 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_it_social_sciences +task: + - global_mmlu_full_it_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_stem.yaml b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cf7a555dda5f3a6a88e50703448b4063fdeeb99f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_it_stem +task: + - global_mmlu_full_it_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/it/_it_template_yaml b/lm_eval/tasks/global_mmlu/full/it/_it_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..4798e10a9a0eb7132c276c522aa62fd2f256a2c9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/_it_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: it +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f7351c1eea56ad7d91eef0bc87141d7da1cd0004 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..436cd3f661051384d834f12675aa399aa069052e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f98f0f20edd395b148f5d5f39bb60bb9f7f1d796 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d9d931feddf665778f78091cfd672f7c0a4c013a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe4290240ec5ffec9c0d0f95804efa55c595b00e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..71b8f45e2b8368c0be2fe2a6d8b26f5bc30ff95a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d29bd758518c8ddc7a412f4abbcc613ba5e0fe8d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f740d259df43bea0ce866be2340e88fa96151b75 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7568fb7e67f44f4755c81dff9cc008807a956ee7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9bfc5ac1ba55fed2628a9702b544a1288277b444 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2101847ec748fff0cd4c5dd694a12b38a9a3b468 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..70b31f9a81bd66aba60f539c4891e570c8816237 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d8917d409237b260f8e8428e56cff1629db8e041 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a49352fb72ab2b57424961b5348f29cfe2e3889c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..27f0c6c330cba260e88a2d9bc8a4aade6c13357c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fd78a52e84b734b46e754eab94dae7a5f32a7ccc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8171fcf18276aa2fa1c0bdfcabc19b639654ee89 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a952ed444b63398acc137d3ecce77de89fbb21d4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..939ba7525bb022b60211221597c1ca3eeadf0223 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4524d4dc3259c52e9f7cd0e339489529781c315e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2dfb1649fc16f0eeec93994333264e84f2a3f3fe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..556aaf20a319458b6e4d3a4bdd13868af84b61dd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c1d5b6008944c8fc498a1b873b8f7ff87e8579c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a35b6bac89dffb1337f7782e91a0119d288178e1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..74c01ccd4481afc27dbc485cb5b09b7cfff43077 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6bec02c4ca085d2e6529c0fa0ab7ae57fc1d06f7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..551a0f8da6f8e35b523756572f73bf7a2e0bb27e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3cf7144bdda5e118f79bd78b683eab29df2c8325 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..17088e513c2d51ec3ee51f3cd4713a76bdb05440 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f3f35f991c13b7243175f44ddc87f7c1c3c4dcbf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..af222877bc8b043b59d88431074038a8b136b0d7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..698ddb5fc3913b21ad05a94c5e875179b44e0698 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6ff49730e275d7a68041039ace5f155b804ea8f2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..58d32fa4afda0cf544b6da454cfbf508dc4af253 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_international_law.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d7c47e55d4f495dea81647b1b4a460282706de77 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_international_law diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e100c0e9255e85c15f3fc87267f1228a74f06ddd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a07444a85b407c3acac51913c97eaf604bfa5d7b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bfd3b7a5ab60d33fd4dad138a5ea501f3538ff15 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_management.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5b5feeacfc53a4bf7df581d5a7907344ba52450e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_management diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_marketing.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d50b46f44e9fa6b7d5287143e01b05c07b800ea7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_marketing diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1b02316caa27431e6cb90aec3cfea45efc89c85d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b638b50de1cbcc90c682eb0017fd89f74df2a040 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..520a8beaf092facdd2bb080addebf312d9d952aa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..abfc739509650b46ab19238d23e131f8bef6daf9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cac74152758293800245078fe7168d680c213d41 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a1d94976dcda73baf2bab8d6e910c5d97062d415 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..74bdec821927ce39c943da94f5a7f4c5d6e1bb06 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..acf999a2947ad8231d79399cc24422f7e6b8b851 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1ec4f58e3675b456e3ebedaa36c49ce45f9a4560 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8b53cdacb8fecbd3860503d04ea8fdb78f86c860 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9b7a24f390d50da9ca46270f5a156f6b7c6f18d6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..727cf4b6e30a1c91484ff8afdeb6a30f04af79bf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..90fd186c56ea787eb26099c0e58950d79472ed28 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_sociology.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dbc77935aa4d4692a638506b2cd7faae8441f391 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_sociology diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e2a923b6b2a3f3abf9ed04d6538f4aa62845dd0c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_virology.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..72758a56c4df5f38c667ed0ccd2fe348a24a8ba7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_virology diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e4491c51bc8e3e781c2df0fe1902a13993d9afd3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/it/utils.py b/lm_eval/tasks/global_mmlu/full/it/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja.yaml b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja.yaml new file mode 100644 index 0000000000000000000000000000000000000000..103460d7e997c00d21f161d0b10d29784ff61590 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ja +task: + - global_mmlu_full_ja_stem + - global_mmlu_full_ja_other + - global_mmlu_full_ja_social_sciences + - global_mmlu_full_ja_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a063eb0b663cacbba1b79afc5502efe5e5681ee6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ja_humanities +task: + - global_mmlu_full_ja_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_other.yaml b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f9b95ed6da3974a58fbd81cd0b664271ea1a022 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ja_other +task: + - global_mmlu_full_ja_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4207fea43fd0cdace8edf8182bcac5b57d1e6b24 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ja_social_sciences +task: + - global_mmlu_full_ja_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_stem.yaml b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7ca6ed1c06a4fd65c075816082fc9bf2abd5b0f7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ja_stem +task: + - global_mmlu_full_ja_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ja/_ja_template_yaml b/lm_eval/tasks/global_mmlu/full/ja/_ja_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..591725e38e184cb3d173071b0d9aeea97948bcc7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/_ja_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ja +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4b65a75be2476d4dc8bc1244d7b26f8c04f8ad99 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e735aa349df3f7c8bfd787044b2061add9057093 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a0ba89475b74e0f3d4492db08baefc458bd9656c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c39d286c7ffba693f61da3749be7bc43b03d858e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..27d09b8887a4138f9385c89b8e4ba57cd24979c8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..15e26a51a3ba91a7397adf477b151d7043a8841a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..52c92423e7159ddeed225b000a50a1cb28ec8236 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a91a7d61717a3c2ca053a826efc3026234aab59f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..67dcd2a02289efdd3cbd150bacb2f43d2cd83d25 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c55ab2a25dee6fa229f5488a423cf276ed981291 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5413c86d5602d40aa982466a1dbbdda5a5589e10 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..276f214e794fcd316ec34659bd0082d0b5d491e9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f823ac44b333e51e4570ea77784cd02818d0a4ee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dbc6846a790eba71d2f5b6d763a4f0fd75c5bc49 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ba7295759a40b51bf0d066b8081258f7a87a24d3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..138071048c21e33f974483b47ddab8eefe7be743 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d88d5685abdca8a4bc9a36b4759820fd79b6a922 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..64cb2b9ee4f6840e17722208a2cb07f65d6a7e75 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b88adf90587b18715066659e4c692c4017188e10 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eef67cc7773890154ae12dfe0d716c3883bab38b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c90e5fb76b61a6a581b6526bb448b3c8a678747a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8318099ac99a420effc370650e14441de386fb77 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4ed5a6209bb1330861a4da399a63a4c87696901c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6ec0ab8422873fbdebd63e138ea80294c747dab3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4bdd9555d65c540bd4a6277853b7b8b2c6c407ca --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3abfd81b1ca3bb9a7e7833a10d8d67535b24a150 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..483161a60914cab158c8b824567d25261701aaf8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..702092af446264025bc997c8b8ceca537626df03 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1b7ce92eece5a40aa9ef4bd2e10a9f970cba3362 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c68acb8e827ea1dc1415d8ddaed18ccb5b34b197 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b1b91833201575b2dd8f1a049a04a1bc5826c208 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2dee1f89e90044d9cec96388b8477ec723b1622b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3612a7eec71d04f51167b53c0d230fda75bbae37 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b70204fbcd7d0e89a99a532ab220f19151be54bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..77ed3c97d7e0d49191119fd8158005e84e5d29a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f8fbb261f7e6ac1b4e3785e466c29bbe7b4d6446 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..58d4afccbebf5c4150409d701159ab2d4a30f8b3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e664390f0bf96f91706a04f6b89edf2dd761bef6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_management.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cf495ae2272d991ad46dc0b15e4cc4956bad00ac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_management diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1349771effd8889c35bf139f78d1a82913f149a4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1b513ac4f22e577808d87f45140a3876f3dbdf14 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..81659bf774474bbeab8a6ef4f30030da1155ed82 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2e77694bdac7e0ffef6457e1e0ad2cd0a3673cb8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f322376dae0acca5b0c32bd649545ba7d225f35a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d58fb0e60c1990541e5b2eea85d74aef9c23b66 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..238653619ffdfc30b0b61435434faff5fda992b5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a044bf99c52e926965669c9341991a0dcc9f09ef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b828e0e28bd24315cb2b295cd96463bcf22ba930 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7aafb6c4cc3d41d8d9e005f67f749d0964a56530 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b0cf9905e29139af85f77696a79e20f768a1d183 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e5ef36c18369bffbd5d74a457398b28fc5142ae0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..565439e6d7f3ef9380452f7f946813a4ec417972 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f7d21bd64a5770ad38531599a3b0aae3ca33b120 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5cc44c1c12a4e077cef78f08fc7815067a0d3190 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8ebdb14a8c2a008b610419ee51476d8cb0402b75 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_virology.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d6f833672faa6d6cc43225f43cae9089c0c1cbd4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_virology diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..23e66e063640e906f4c31f7fded8b0ba7fbfad08 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ja/utils.py b/lm_eval/tasks/global_mmlu/full/ja/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko.yaml b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d2225e2309e8ec6b3f33ef1d85f3c719f9bb34ac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ko +task: + - global_mmlu_full_ko_stem + - global_mmlu_full_ko_other + - global_mmlu_full_ko_social_sciences + - global_mmlu_full_ko_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c7690643456b274d6a2f07a4693ceb2371eae4e1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ko_humanities +task: + - global_mmlu_full_ko_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_other.yaml b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8990ae957ca2bf03544f30495f92d95d641e0e70 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ko_other +task: + - global_mmlu_full_ko_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0bbfad7f24acb3042b85c0f8825f51f6e298a6eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ko_social_sciences +task: + - global_mmlu_full_ko_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_stem.yaml b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..18b7f17bae7d6a67d027342661184672a0b95a32 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ko_stem +task: + - global_mmlu_full_ko_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ko/_ko_template_yaml b/lm_eval/tasks/global_mmlu/full/ko/_ko_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..11700a2611eea6353ec3405e5dbaaf3269d53fed --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/_ko_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ko +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5959d7881bc0366bcabb2c0144f761d8c4858996 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ebb90860fe61eea9a47a5f6b0561b09a3d194338 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..670846b327253b15226b264abfc3ad5f76e46d00 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1a44e430211b6f47e042802ebd01d7b9ca5d7c04 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e9e29697c08bf5e7be4aa89032581826e24fdc10 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fc3644680d3e97f82d4fbc801fa0feca11136f68 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2eb0f416f3f3b0db9da85a702cd0df96bc30fbfb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..044f1eff5a42751ada4ff2d05feba727ea68415e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9929097ce772a874064b4376f4556b446b048a86 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b78c24e6fa2419591a3c072b19dd7133a168aaa2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..20c3fb20a57b1cbdc87e15aa8016e01632135551 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f95457249b51a31d715ed22a9b39a37d39ee93f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f79989750da6c136e7971c6afe2f0c7c5febaea0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..79c35ed7f98c8cd53855ccbcab54b64c62e0ce54 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1444a2495a807e9fc826d2b2f6624fd90956bb35 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8bec91b9913b00714dc15978b6f4dd69c546099a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1cf3109238c6074386ea0d9a30abdd3b489661bb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2a5f7bd5403deb9e6ea8515c9c4e7c671f120635 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bdaed574d0b464fdc393f49be834a5fdb06375fb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..193a064c984f847b6dc679cffa6cc0223fa30738 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2d2ad648afc5763de6fb05e614043a61939a3f0c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a48b602dd0b762a16bac7801a26c8cef0b819fad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cc9c20eb6a430de9c445b55a7310870948c6737c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e86a27fa5c2f44f2cbcbafbf9680a0032bb91016 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4b947f7f3f451ac902ca4e9d5ec036efccf7b81c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9184ad9ced39c8a52377da4eab6b856875d8fa6f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..50b6a150332ca02078913fe01226fc1341944ef5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..974e3b03da1722e347b4e90c18762232238a830f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e617e8cdd6144066c57ece957ca27f0e1493ffba --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1a0105961c9365ad342dbc006c8425ac868a67f5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a696675df6951a6d882b89e6c0da31fa7fe78c82 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eca86cbe496ee634f735dd2d6b2daf3557320f7f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..69e3a2df40e209b8105e28c08c1eb63656fb4917 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ed3e99fc51a7d31798eca2b3b18487fd92d4866f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..651f389c20d99fd93cab22351d71ca13ab890f87 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..001807ebcabd19691bfc34597eb9d0528a7a549f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..01eec477c5c460e6334255f3463124604fd0e174 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c1126c6b7ec6683b245df8f76d16bef755640eed --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_management.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3b8332705879711a026ea9df177fdb51f8bad090 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_management diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3cce25c098de00ed09758aa874658822776f0b7c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..65df17861b5182a5c75e90fde31646dd068a37ca --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..04b71e2a702f13002c84198f4d531ab07fe91d45 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3f1e7fa1bffacd485db4c9d5c556e331c63c7abe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c657543aa52eb0e75a7d2a64fa959a02c3a3a478 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dff6450f921d469568cc3f1d3e7655e090b2bd0f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..21f058af0efa215550dd81539e55c44fbe63fb6a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..56aedae91fa5a7bc214dbb58c2ad3eae52694b33 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..24f83b232ec63d61785c9a4487d597295702800b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ece9dc5c6ee09e19f6d02523b1a38feaa62f06c9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..439309570bb0360304ddc46e39356cdb8ebfab7c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..98ff6520d1b3037f8184afd6da45b1b8ac2ab233 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1a5b07f7e31cdac2c3417e9ae1c6de2bdf46bd42 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3663391a8fff81cf98712224e039b83b389caa85 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..902b4443e2c696a992c8d4089604e7da23adeb3b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..36e1794ceb2bbb5737717d7cb8bbc2084d1e861f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_virology.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..64b58d6a95907577fd3f072cb0348bca59c80656 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_virology diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7289671f5c4f6942fa770a835fddd60f3ab4fade --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ko/utils.py b/lm_eval/tasks/global_mmlu/full/ko/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky.yaml b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4774599a13e150ffe011af036e6bbda0d4901496 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ky +task: + - global_mmlu_full_ky_stem + - global_mmlu_full_ky_other + - global_mmlu_full_ky_social_sciences + - global_mmlu_full_ky_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1e0368c2b6c777c29b37518cf7f574ae06a703d3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ky_humanities +task: + - global_mmlu_full_ky_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_other.yaml b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1bfc89ab6777fbf1380bc4ec506ac2e6ccaadd32 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ky_other +task: + - global_mmlu_full_ky_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3ae756c4bc364d0aef2a8b093b3b27f1d603b817 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ky_social_sciences +task: + - global_mmlu_full_ky_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_stem.yaml b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..817456fcb2c31df7614c2dc8973b9aac83ed92c5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ky_stem +task: + - global_mmlu_full_ky_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ky/_ky_template_yaml b/lm_eval/tasks/global_mmlu/full/ky/_ky_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..63f88823861ee5dcb129d843dc1ee45095ed93e4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/_ky_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ky +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..21338a569ce25b419e959f73732269d734a19d8e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..df2635483c6c407c8756bd25151143f87fece8c6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e0f6abab0550e86425933ae919f5f185359c2ba --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..17656dc6362b31b43686f1d0c6640219365a0854 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8c053b884c0029da3bb76dc1ca3f00c9a0df0383 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3649210635296607da1a355402b61731c1d064de --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cb9f858681b003e4cc7670866ee8c99dedb63e3c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e4b15b5401fb69a53a16b73369019578d4c656b3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f5657b66bce4b9108fff6a0e851c827ea1e3023a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f1e0c25e6c4f9685fb37b4aa4446615714f15f04 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fac1d80f31fe382a056a9a2be7d60208c7d413bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e35718d028c1bf3aa1a1be071e5441ff42293b0f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f165ec61962550c284827e68cc905d22f31568ee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..48670c7f8d17747e03a11603c61f563ca717fba9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..29d241428f1984fabb2071cd248ead66bcd99ff7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9aa7f81b48588e4c43d03108a5af42ab13c431ac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..70a5bd8603a71d3f9d57ca9265f68d1410bc70cc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f678c0d130bd4de90abb3016fbdbc18e7384eab7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..750bc68b8c8c769f35832e610a1f8c11d9cf15a4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7700e37fa9d1118af9916cfabf9564c44e07894d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c805fc4e91c7d29dbb2bd33ecfea8c8c116639cf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..01c67f8e332cd612a810f4801e3cb16211e7eb97 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ccc5c8b73d1d1e70ad5c12504acf77fcd15c0273 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..02ea66ef6b4be7105afb290010e535be8244e7d2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f693296dbef784f629fabb544cc95ba3f7dd4630 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b05e279925c8fc21efd67700618d830a5eea9ddf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d596290fa3ad19f7641a0dd178291f00e21d341c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3f71865c250b4424f3b4800b19ee176adcb06d4b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..635873a1f1598a3714c7b5009a05750ef3e26039 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..df8cfefbcb7d17314d05930ee8fcd140b2235025 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c75f534138a80453eee56e52a5da632cc06835d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e73edcbea8ae601b1fdd24b8b0dea1d9a4222d1a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f4e662a56d1752251a61149728c59059c6591925 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8c2556da1976fd1cd586ecc1a1d72be01fc84caf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2af16190992c5f22249fd716fb3a029e0fba1aa8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f0994cc2d715489ce4cca164cc2ae19902e251ab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f7933a77b34683a2f9d3d2557f1e13fd23f1c072 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f6e525a18a939d63290ae9830a78b8f840ae5388 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_management.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..03f70aa02dd866cffc52849bbe0771933557f10c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_management diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..72ced798867ea4f86e9d7bf0717608be99da70c9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..371e4b2176d9396677782c888a954a1acae2c1e8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e693ab8b9f1a4c5e8d149704a0bdb102c7692916 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ccafcb1e0e6f06576f38b0b4e38cba94fbc1abe7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..16c19b29eb879b3fedb7f9f8411d9e9f918bb109 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f6c00cb0e7cc9a906dd01a4c25e663d9cf09908e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6d6d242b0181a2b7039f5998d6a02108a90bf585 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4ff2e08d4ae0c4291b663320d9ccfc918bf6f8ce --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..37c6a892286f4ce10cb682e60829b525b33fe54d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9b4fea0e5e3b23b9b3623e7be6fa79b98db69592 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8a6ef0b531d1a185be293f35b82c18340e39577f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dce1b6d2d626b35d3c497e9a6e8289017348c39a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..168cae74453a423c475b4ed7f13f0fc2f7b9cda0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1e24b8167a177e2a5cbae1dee58e34ec2eac20a8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7d1ad959349ad14484c27b28a419b3a1081c407a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..36cd7e20620780639fe050aff5facfc45dbade27 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_virology.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e2a7791512f829437488c5fd8117acc96f75c618 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_virology diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..563c1397689df77649e0a29c291ec3009d9d2210 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ky/utils.py b/lm_eval/tasks/global_mmlu/full/ky/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt.yaml b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt.yaml new file mode 100644 index 0000000000000000000000000000000000000000..93929d42826f8cc455e1641cbcc9eff2bf6a0b19 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_lt +task: + - global_mmlu_full_lt_stem + - global_mmlu_full_lt_other + - global_mmlu_full_lt_social_sciences + - global_mmlu_full_lt_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_humanities.yaml b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..48ad351f725275c8bafa0cf32b44cd3ccb2cf134 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_lt_humanities +task: + - global_mmlu_full_lt_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_other.yaml b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f63c35ad2223dce8e1edfd1b4e6f4fd2539c9db --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_lt_other +task: + - global_mmlu_full_lt_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9ababd6d14e6b2745ceac16953b93297bebc06c8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_lt_social_sciences +task: + - global_mmlu_full_lt_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_stem.yaml b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1a59e683357b1994872a4ee00e498cc413233496 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_lt_stem +task: + - global_mmlu_full_lt_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/lt/_lt_template_yaml b/lm_eval/tasks/global_mmlu/full/lt/_lt_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..8b925338059be1d355a867e55da838e169ea1d13 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/_lt_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: lt +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..76b96844f696cc22614cf2643858d4d6c9417754 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..527c71074d96038c1414a9d24ac60c45b4dde78e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..419b89e30ed8d5100d83d56a2d0be71888386e65 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c51daa226b39284c5bf96ec531dc90fb1430fbd6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e023277406ff3c783c095e6996ac0c38211068bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c6fea6f851074e422ab06a10afee17138ee8c607 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..93b9a5610e16b589dd3e7e68f847490838e7f16d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8d0dcfdd9ad41771c5840124f3688e328b35b076 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8d33b747d8276d3baa23b7187aaa5e2bfc9f7939 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ad74dbb15fcfdbc529e9e0440bf2122ee0aa9e8a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c69754b32b3d7affd9a2313a8b2be9f40dbbaf2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d78f3a545da42447e590ef4f0156e6ae49f505d1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3e7b5e49e6e2028dd176336874c902ea68fccb5f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6d0085ab968a802e03eb40c8526d8d249d382ce6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..284dfe9e27a30ed0e529e80d004c3f273bb2b575 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7e9a01030ee326f7a950704b589c8169beea6a67 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ec9a665b52d2a0c8aa1af6787eff4cb839c998ee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d81a9470486926fd53c9cc4f9d2d8d8a8fc19eb1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..139376cc770a8a0c321ff3241147afff31e5b164 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..87112d8c77823c53196ae5f1ec70e5d43cd9cfee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2324bb288496581d56df81e035f2a0faf16e0040 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f365fab1d50d5cc33935198ab2f5e3d0a80d500 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e3a6f9214771c7281150b390193cad51fe844c98 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..526b68ed9051c96d53fe8c54f95c386173ccaacb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e14b1dcea2da09877e98827770e1b14fe7b55cb2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1cdf5c90b9cde143640c51d349fb23fef3f0ada7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a2f2a2103bf5df058fbbb5540b83ec9b5216a955 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bd363709173087d90cfbb9e65cf1df80361f74af --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aad65a13643885afa6d00742737983ff2376b4c0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6dd6d699a1a2eee445ba12047e4687c86f7e8440 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5fb0ee1e513d860418cb68acb630cfc7ef1aa10d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..75f2769ada0253604eec937509d7927532b0c377 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..beb27e9bd2471b6de27150e60d89df8fd6103d27 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c9d952c39b0a13ff0887fdb671535776babe9088 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_international_law.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f77adf9b399d0dfbf28debccb427d95ab2467245 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_international_law diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e6be84fde4ca8de4bc8d1795ad8ba6a223bb3d50 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ad597b27aa2717ae8aeec74c69a1595bfafe129a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eb06a871b072676ed1e4d4e5c09faa9657346a22 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_management.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e1885ad3faa748e02b78cd1691e8cf4f752da223 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_management diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_marketing.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2dc83089094d69a5872cd596374c7ef9d3707ac2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_marketing diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b67d321e40986fd04d93a75073868e98c7b0998f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2c744613c6e76867f77f901428b119071474c06d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..09e6f044fcf2202fcc6d8278649996613662b261 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bb8dd3307fa10a0c5628413331545e7773c4d245 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0b1a85566793f56997423ae6063b75b44aa017d7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aab1d556c86ddddcc47ec278c45c6dff556641ad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ac93dd6cc9c14abe297b765b04a71c8b51a44ab4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6be78ec390713a42935600ead6cdf9734c8f5cd4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..60b6cdcc0b9016e13ed117af0ff9f38728f536cc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dd899676e6dfedcda15b8d30b048906b81983604 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bd796e8b55c413a9c1937b92ad797174e8a0186b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c6e5f397293a0cda432923d9c9bb8d71b305af3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9eb9957d2df677f8d81aa8cd4123671cef29c53e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_sociology.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2e17f95a66b8d28a0b68e6050241d6140988b7d8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_sociology diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d39bb63caec03b27a46a16c3094aacd15eb4d567 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_virology.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b8482a618bda42c8917f010a31bc259dbb4386e2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_virology diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a86af60dce82c39b95efc1da3e430334d30c1df6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/lt/utils.py b/lm_eval/tasks/global_mmlu/full/lt/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg.yaml b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg.yaml new file mode 100644 index 0000000000000000000000000000000000000000..05b55948401eb1c8f4dfc9862a29170f3396d2a1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_mg +task: + - global_mmlu_full_mg_stem + - global_mmlu_full_mg_other + - global_mmlu_full_mg_social_sciences + - global_mmlu_full_mg_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_humanities.yaml b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..76b08f6c776b956f312b05f879ba0a278025607f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_mg_humanities +task: + - global_mmlu_full_mg_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_other.yaml b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0006af4c995b0abfd5d34b87e363218b2a445c20 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_mg_other +task: + - global_mmlu_full_mg_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9cfe4f5b02b6fb983c3ccb65f9a4345fa3b39222 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_mg_social_sciences +task: + - global_mmlu_full_mg_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_stem.yaml b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bdc719d1076ec7081b6547d655887bfb6c81dbc0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_mg_stem +task: + - global_mmlu_full_mg_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/mg/_mg_template_yaml b/lm_eval/tasks/global_mmlu/full/mg/_mg_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..4aa97b27f3b6926ba42070c81c64b004e8f331ac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/_mg_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: mg +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bea850ed14165bc4e32806c9828f2074b86ac36f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1cf6c1169aa1379edd85737d0d4ac21875471679 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..df582b2720bdcb16d660a8f4c8c1aa7bca0bf76d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a63513426992fc2a5a1a3004fab4fdea268a84e5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..21003af5044b9535e107b4fa357c880dab55a8b6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d305ca94e852fcc68c1541a2f0b31174c7871dd9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7ccaffb9dce404179a4660422a10a0dc9ab065a0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..248f72c3df3a11dc885ea56bf87ba09a9b90fd0d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fb817aaeba681738f393148349190a197751d3a1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4fdbee03bfff14a701eeebab940dcdebee55dd44 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..493bdf87b557a384a093d779c2e43d736f3f793c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c5054eb639c576647380db82e046d6c814c63212 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..44a13a70eabbdb30b07de7844746c9af79e9bfe1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2c5d029adc532558620e77d7f4f5c35205f783a1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1e5ece337159462750cd43a61f0f0ae0fcf8e5f6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4d62c7585ba2c757bd143f057ab7a6e59c92c03e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e5dc67d05b6978c5515d857638f07fcd5ebc2696 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2712e9b4691d1c64c5a45713232d9cbcd63b0837 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c58957e03ccd6c85d76e1ddf32ef875bf2f4065c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..707b73564049e39a10ff7e0b87dcc128c36c1f2e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d7afd5a7425e7161ff8c2b3d61f9abd77658ff9f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b6391ee4473db316dbd5685e3e94020648a4c6c9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eb7014a6b5480cc8123317379fcec9bda391c93f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..74c5fc18df2caa5da286ec066052a1b73906c4fc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..24631ff318e3f6f7004a9977cd4831c06188e183 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b9db4a0c2e13581833c88b96acb02bc02b3a7f65 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f321b06ac63d32c971baec8863b9d91a91a73b08 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bc25971e4c69aba3ddc63673b8044410570fc429 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..42cc39a8e78a32326c49315c3c9db5c25a23e820 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..08cf8671208f5a4d3b94e8056cea9d35cc0446c1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..87314a572bd152b27a37cf4e9f68c73a62f3060e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c341a243e9ca66ba06137a4934fc700c53993ebc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..15375f9f0756d185c588cfd92abd58811df57dea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..21419b9b6dfcf1b8222d349a7acd254be0281a1b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_international_law.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9d481339372484a7e9605dc623dd51b924b79cf1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_international_law diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f083a0ab591d6faf45d026043bf7ca452d1643bf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..57e2e731820760c2aba37c5b1bea43f8dcb80ad2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7609a09f687cdf4f1791cb7973e5658739f06d31 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_management.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..becfe4b30db1ca6801bbb9a6c52f87d9c309d079 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_management diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_marketing.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3765002b8210565c124d39a09a3477447506cc62 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_marketing diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3f023ccd4d3d9ad1348b8ee1737c86ce167f05e5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2993999d5a53f05e18e4be854c66eec9cbbcff1d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fd430a0a171bbd9237ea960f8dc7b8c2e3c90b75 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c1b16e86512c0bc29969cc09f978d3888f4214d3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ab471f42cab9f636aa71bf8e238b9b4e8ac7623f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f598830eda5f37fff7080609b667950753e971ec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..330f1f52aa90c715a3779255331265974ab6fac6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..694118d119b59f7270435d0a6ac0fbc258dca415 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fb6df92ac0f172469c43cda0fdbcb18cc7a2d650 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1de72b6b887ae1e9f081b13c7c00341d122cc2fc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f922e1624b84e807143f7fc1bbb142710f338c93 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c829b89dc10281bb38ac778493bfc57a156dddb6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..362b4dbd13e1db423fb30a11b60720c10f37473f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_sociology.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f0638cdb6d4535572c8a34a1c68ac25ea1c01e42 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_sociology diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8ead541a433df13870d7f045b2564b4f0aa95f3f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_virology.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1ca09027f1cb3dc22cc1aa2bf8bebfa45a144276 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_virology diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2bb64d70115db2e1f79637c8f0928b615cfca71c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/mg/utils.py b/lm_eval/tasks/global_mmlu/full/mg/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms.yaml b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e5a136458b827f882656d67d2b2ebe0df4110351 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ms +task: + - global_mmlu_full_ms_stem + - global_mmlu_full_ms_other + - global_mmlu_full_ms_social_sciences + - global_mmlu_full_ms_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0641187b82aea23875e576e47e43ac55f0d7c2e8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ms_humanities +task: + - global_mmlu_full_ms_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_other.yaml b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3d14420c01090b328a4f6117a62746e659868ec1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ms_other +task: + - global_mmlu_full_ms_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3db339d79e64508a357a3de0dc62d9d5ae675a98 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ms_social_sciences +task: + - global_mmlu_full_ms_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_stem.yaml b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..68908e16ac2d25395f72b6a24da64a39eedb0f0c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ms_stem +task: + - global_mmlu_full_ms_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ms/_ms_template_yaml b/lm_eval/tasks/global_mmlu/full/ms/_ms_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..ba75026494233a4fde29e56c4de24b5d45669bc3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/_ms_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ms +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ec791f2a08cca1bbcbe9d4f294e42aa93334a98f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..35038beaff444dc1241589a79a1f1ed2b0f53557 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..79fdcbdd0c9a02a1c11614a9785598c73a22b566 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ffd6195a9817669d3e1a76b4eaba3ec7af2de5bc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4c69b82eb5f4b243e2b83d4257dcc0182a5d57bc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5821947946dc2f3bb6520331b5cb251a0fb46073 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..35514b83405b51cb2870c012a443c2a11ed47983 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e242b8bc260cabeedc1cf857377e26e935e441e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..07e107995767d4d7fd96ab4c53b08f40ca03d95d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..82822217f4782d36a8d10afdb9b08dd856a05978 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..be20fa6cfec6f4c913270e96882861cdc93967d3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2e886b501e59606923b487b90db6c8d74dc9b340 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2a2fb6da5343d528d5a2262cf4307a662e073dc1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..efdffabf9e3ba98d2b110487cab97fab9c757cd0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..80eba2e4795a06c026d2a54644eb5926dd9afcdd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1e6caf2688b9efd8012918b8a1959c0f2974b52b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5914766232a2c7518e9517a3316d7fab569a752b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6ac76cadf16c8d7feb8037ed42a50df02f7a2312 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6be8ccfe1a25131ec4199bf16f735135ab566c52 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f01c29b671350403a193361f4cf6c90a717b6451 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b18e8cf85be18f2ac7980c628af29b55b1554fd1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fdb41802d75ffd0c1ef875a31c04da74112ad7f4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c4e44a60ef7a3d4f7ef7cae359c7f95934ab6286 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0ebbfe6f4c20ed749b76b814b799926dddab4c46 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f28f9a5ddbf9dde465b786c262aeb750f97dcb52 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..50a2552d0b7ee8f87cea9d4fceb9a8995505885d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6747cd9da0599822287dcc572764f8ccddb2c38a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aef3fee828e0a9f4944d9564df99a9d7281fb083 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3e8641e97d5b1d62c26a0b8d1afb0aaccf132a4e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4aa7ba0057274449f3250892ba8e0b11a0ca711d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e6d1faabf2ff4fb989a09da310a472c66b0613fe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4caf7e54727487e23fdcb6b7b5067ffe6cf59545 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5b2b5c5f4650a5ef51a387707bbaf3a40cfe7d01 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2ddef17a710db537488125b00ae751a99cb3494a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..61795f58976509acf5212696539476163145b7b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f2e96706b7cd686ca2048dd36c44e2978a90a4f6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d142bde66593b7b106258a37534c3a678e7cf1d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..947240560206e5e186079c1f0e3f8a8c5c53193f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_management.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8ca04a13f3db9857da2dec99074052cccc5547f6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_management diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ec0e44626cbb32df8bd06783583d5a88285268bb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0f2b1eec7f0d0687e5bb730599968eed83864341 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..65da952e266b53e4c67a9c9d9881026337dd767a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..399035f2c1567c14de08de33d331bb7cb62b03be --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3bc74baa48b1b199a3cdfdf4ccff69fdd9246f3a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..300de6771aad1aae4994c08dc77ffeb78c3fc1d9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f6eceaee97e08e1620a305fc0c2c8eb6cfcc8db --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4c624fec5154688df3bfb017b79f48838e1c046f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9a06e7f922796015310ea788ccd805a4f7ebbafa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b3d5921a8ad8139a267cefa8d3b9850d0cbaceb2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0d9a58b7b8f81b127803dbb0eb81ad41dceb5d50 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7f51baec9dbfbc1e5899f04f627d663ee1d88af4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c07cbdee8cb4a814d567d47b3cb20407ce95ba78 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..651cb72d4eb37961ebaca6a47ede956b727b596c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5aeb7efa4cd54a0267741924b853025065c8216e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ecbf57059919c16149449d97829f8a281acb5d61 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_virology.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fbdd5e256571de2874c574db059771d6ecc40da6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_virology diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..32b350294c8410005952d248c61d9a84375d8c21 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ms/utils.py b/lm_eval/tasks/global_mmlu/full/ms/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne.yaml b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ec13a0be2c9dbeb3933d4d0a5f841dde583a06bc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ne +task: + - global_mmlu_full_ne_stem + - global_mmlu_full_ne_other + - global_mmlu_full_ne_social_sciences + - global_mmlu_full_ne_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fef749dbfa7cd9fc848e010ef5c318b226b6f194 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ne_humanities +task: + - global_mmlu_full_ne_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_other.yaml b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0d3dfbd466c3b8aefdbb14d324d0de1b3653cb34 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ne_other +task: + - global_mmlu_full_ne_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f1f09f00211510df0ac89d1f8cf28baa0eedf214 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ne_social_sciences +task: + - global_mmlu_full_ne_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_stem.yaml b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eebc1cac3767ca67a10edb6c38fd70b5c54dc15a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ne_stem +task: + - global_mmlu_full_ne_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ne/_ne_template_yaml b/lm_eval/tasks/global_mmlu/full/ne/_ne_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..25f8daecd02bbb0414f665fce6ccda58f1cb0baf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/_ne_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ne +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..48bf7bb188c221ebffcb2a729e5fc86649b7f05b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0f66f8ec7803650ac5cff6a60dfc9bb9f8388fbe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a02aaf30cd78e9cbb9d24aaf5f3787f8a6749a21 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d87f5b985c6e8a7f91b0b6646a72d290e72a1112 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f27eb4e2d358ba86a0d7567f2affc416143989bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d26edef8af4d7dc876a808a278ef2fe63d4ed0ea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..88b8bd86f4f4345ca2a250b3f12def25dd018aaf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..51909ffc840297e9f9028e0086fc159eea82181e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..40b9cb7963041a34533068f20110e20b575049cb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..81f81f84437cc9c0dcf847b08d2969a9099554da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..09798c096fa17206a0a96b9765ce2b4d05a74684 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..49d89dd43577cf8931b0209d826e07fbda631c0e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..94bfec4a90dfffb4a44a71cc28061587bb1eb2ce --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..81d6ed98e045976c34e0043e4b3c3026e958d695 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..73ad1a34a39b3944ef2634c780527caa3a82d471 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cbc3bacd0839d6c0fbb8b86eb7501c1f92b303e8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..225da2fb5bd475a35868879e26670145d6e0b880 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6f5e9f1b4e447bb7658d73e7f48e19b16d55ff7f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a8c0436a44f598b83d8935e28ef61e8708415cdf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..405661c7e2af99823f8352f098a24755c610e9a2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6cff5ba64b352c0d7f391492e95d173e641144f5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4f7eb3ee33e2449dcafee69e07263f395fdd72b0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0453e51d9d668942973887dc64ea15d0d93584a1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..05710100729043b043e6b6c4c8e87e418b168e36 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fd68d5f041e914b82b4cfc5df2b2ce0f21baa2a8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..39ef0a58f7fd865822aa293ec6becd3e74559809 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..535a391821a13f937d59174163b0a481758969ae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f355dad253203b92e2f7903d12c8353178ab3232 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a52d7a017bf063a16cde6b3a65866785a65c4dbf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5a2564200d4cdc0a0b0c5dd68ab4421861f1c06c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9e1199b17fb77ff7bf7c7eef871c8fa2d43fe5ea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..afc2135b37ddae395d6d4f0864f2ce42e23a535b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..18450534654c637b06c56836a9eaaa44662718ba --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7d23b8397bd04a04b2a3d50f750b39996cfdbbcb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5be599d20c20a2b87ee2043abe7df0d07eca7326 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..180a397c62f976822662b1b3997b352b8bf09d23 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3aa369a96354c77c7e6ee0fb2b9e57f99887d8b4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4e08abdac804b60935363639309106374cdaaef4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_management.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e44c5be63b6bec1011631761b40c32f93c4b775e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_management diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..10f7daa274c1b821b33b73878f22a186c5629715 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8139b1f7a65c5ff5668d79e8d3f0b19cf1d866e9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cb1bf905061a29950ba1ed2b50678fc8b25cc489 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1b74fb365022fb4eda448dc1fc77258780c49428 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..91f8f06c5a769c701fdf7f7bbfd0955d9b837a05 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..575f0e45506432d31b4c34c5d602b449a1e3d8f0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..95fdd0eb343fc2a738c900782c9ce37dcb8c429a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e6e5c706c0b37d1d0a176f0d43e6fc41c8a47485 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..718cedee02328945a0e05bcb590ab2ba158af9f5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..89c70160a6bbbba493f5e3c702ea02ebd850f94e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a366e0c43a3ccadd06b6e645d3f11da62cb67486 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..649e53435a1e0842ed0280efd91ed9ed8c32c822 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..37f2ddeab876d0c1861475dafb53c52cb20e673c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..55f809041b2b726de52d63ebecec0a584f361317 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..78161d5a113e867abf882a1824a6a6a8fa590d4b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c38f59c405afe4b39415a5ff16d6f4083a23be1a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_virology.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0c15808fdc84a2c3b216833352c94731e83cb64d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_virology diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5c6163f12c9d3d90ffb4b9576eac1f809bb37e03 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ne/utils.py b/lm_eval/tasks/global_mmlu/full/ne/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl.yaml b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl.yaml new file mode 100644 index 0000000000000000000000000000000000000000..44f562da97b08002f4d5979dfae15180b3cbfa8e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_nl +task: + - global_mmlu_full_nl_stem + - global_mmlu_full_nl_other + - global_mmlu_full_nl_social_sciences + - global_mmlu_full_nl_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_humanities.yaml b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..656a421bfb7dc29eebfe0013b7c69b331fe04c6d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_nl_humanities +task: + - global_mmlu_full_nl_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_other.yaml b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..23a42201ec4f8212e836727b434804977111cf7a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_nl_other +task: + - global_mmlu_full_nl_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..afba5678d182bfcbcef3bf227a1800dfda12b535 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_nl_social_sciences +task: + - global_mmlu_full_nl_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_stem.yaml b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9658b13ed512182d510fd72c60cf1eb44eec5e6f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_nl_stem +task: + - global_mmlu_full_nl_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/nl/_nl_template_yaml b/lm_eval/tasks/global_mmlu/full/nl/_nl_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..39efbfd177533bc57bdb3c831385a5c386b084e1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/_nl_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: nl +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..458a36140a17fca9a855c4c1c2852fba05f14b7f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e4cbd90ef070f7bc12149fb03f46c55d6cbbf591 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..84cdf5781ba1c8e50b79069ca291270ab5f742f4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f75776f24d231111aa39e224cfeb5e9700b3e253 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6e963d0f0d93ae41918a86b2fb5cf63ef243f5cd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e4a3660b07b1681b9f31300e904391ddf57b04fd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fa9faed92b3f8c53a714cad2294eb6bd737ca57f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b603c309105a1873b5697c7852a15afcafa3d982 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f55207ea49e6588dbfdc36534ed59f514a3d35e1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5cdda1b7755bcfc2e080b9c5bdd924bdb561c189 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..26d70230fb8b945e0554ff7df68e0b4e5fb4e114 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..01a8a74742e44468220cf1a7f85aa68f1d53fa13 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cccd266651a36b14d85ece24d3d639964bd7106d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..22ad59bd462ceee61f12955852634723b02852a1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3aca226fdff9fe7219463f5dcc2cc41bb4fa16c1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2118a1d5d6935b9ba7603b393babba2a027536c4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5fd86105df589b16c08f9c8abfdfc7eca6cbf712 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d7147d517cfcfd02013b3afbf8ba0ec11e95d180 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..271b54f65e95485e1fdfb3b4e3d39d282f6c5f1b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..921abd1745e94ee09467c2d63f0575455fe1d750 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ea190bea8face93cccc33b337750bb39707ac8f5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c348d482a074bcc6dba6eddef8905a8df859cd8c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de31a63b8df61eee7f08b5a96e283c7e5b93d400 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bc0e3cb1bc79dc6b5400497c07ba789b72cec23e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2e221c685fb2d15b7d9719e2e17d25ec44b70c1e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..137158a690d175135c0ebf2d94f038ef54fbf615 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..27b426c06f46f522fe70dccf7241f05b27af505a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..746df49e81b176db860a12d31781a06ae6e93448 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..89cb42d9472a8e6517a6eb5de45312ecb826e02a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e27082c4cb8c320b8dab7e61d92db36c46cf1484 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..66efc58c9673f3afb5129caa39cd06fe31e275e0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..83b653454773bf44e5a8fe8ad35125fa68b120de --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..82e00b4b8fa93e31a3307d40482e1dc628f6a16e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..468589da275b9236dbb72c6eaee686c5c9fdcdc2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_international_law.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e5bf62a98e7b3442f821bc3209d124e061e49052 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_international_law diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7b53361381a6c63cf3eb4d3a0bdace7c7a29a10c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de862b6631b9749b0f6f343c99f35e7f8d2b6fc4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c205af001d210fdc47806f322b6771ca24f900c9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_management.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5b624af8bf29f0966c7d9d5e816c3fffb704f743 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_management diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_marketing.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..81658e9f0278dc681e525ca52f3abd6bc8f5d657 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_marketing diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f8e52c0d4ec5d2231ea89155dde624887acbd6f1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..31af482ec8ca1a1bbd97cd82f07651b2a35b092d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..853de0c12fb9416a94ce8e8e1bd04e3a63d2ffa8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8b86e04500b404fa145fe1bbaa70ac9e36bdc6d7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..96036dae3809fbdfe6187de08a48d666e96245c1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..84e827dd19b3e6184f0868c7f26bd1e38eddc420 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f49c8a5e84786200acbb4f917b8a697f936aa1a9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..45484116ddcb28ead1fd09b72aa7c169faa5c0b3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..17b28cd32c70f580fe1d6e80aa8dc82d6a4c6875 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f4db01bc67184c4ece2b11d64a38f8d3ddc8b441 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..be586b45d96457457c2a3ecc27cc8dbc06ce4f26 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2ffe584887a64f84ce5ec1a34c9a83e9c7b8f1bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b6c76948b5866e83a1070fb091a3d2fd2df8b5ec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_sociology.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..983e13cde755f5c09c4c6338bf463a1ffd89892a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_sociology diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bd6b6227bf6c656c75e4f7d35991761c92e72a19 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_virology.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..92d1973bb57dc8c7f2a57f8d0d14547b55229041 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_virology diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a8c2eccaa24955da352770a507f00e59fca5dfe6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/nl/utils.py b/lm_eval/tasks/global_mmlu/full/nl/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny.yaml b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c325bf1d7123d0ec0d3e64470ba7a3b201a1aca6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ny +task: + - global_mmlu_full_ny_stem + - global_mmlu_full_ny_other + - global_mmlu_full_ny_social_sciences + - global_mmlu_full_ny_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..89e7618fbea41e79405b24889b788bf59d268fc4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ny_humanities +task: + - global_mmlu_full_ny_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_other.yaml b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..51b90446d1fd2362581c3ab9fb159e3fe44d8ab7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ny_other +task: + - global_mmlu_full_ny_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b711dfdf14029a040e965c36fdae79e0702c31e7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ny_social_sciences +task: + - global_mmlu_full_ny_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_stem.yaml b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..99bf9d95df56a763b2bca25c05d20b1b4f2ba366 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ny_stem +task: + - global_mmlu_full_ny_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ny/_ny_template_yaml b/lm_eval/tasks/global_mmlu/full/ny/_ny_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..069a9446ec733541d3295268f7f6a943f13435a8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/_ny_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ny +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2e3d7c33b7cd893564177664390bc62554a38e08 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..60806afc61e6e35998b3c96d2631b01b66121923 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..afbcb482fb44bac4274140232adcb014dac97fd8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6f8981bd0280c4743b0461ee2b0d8587c8bc0b01 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ff44dd679f7e478f3663c7c8ad0aa1b37afdedef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..da5ce37080d7665fe15fcfc067f40dfdab2a37b4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d62bce830bd1a21fe0d05839cffc53d17f938061 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..48cd98d55bd649d2b3a94587b17ab59ad9d6f71f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ed77ba9c18df83ce3a87f9e6d1aca93974d6f35e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9cd8aa2f43fd9f29c99d1afc2b722f613f801596 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..66d5dc27b76d8d0e670244fa3d1ddf5003726b9c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8a9dae6225be59b56d6ae32e516846f574bcdd9f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8d160ffc658171e9aff349f431ba3c3270ecdbf9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..88af709a618435ea07ae6acc136471a593bc7e91 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d835f1e003fb6aabba6b787c322667351924a6f4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..558ffd0bac03a9fab192fd1e7c581841acf8c4e2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cce0df19215c36e911fbe4b8757851da6dc49029 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6ce027a55f9e74dda90a9028259213afa97717c5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a729008d55fc6da1ac8f681616cb881a13062a66 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..79771bfb651ac6bce183fea43fac60366d28530d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6889806f2bf0e1cac4de62afec43278bbb9caad3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..29e6e4a5a3cf39c64c0bda8413c91111d1238416 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..447db75fa9d2f3cf4401e576f824eab474235606 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e543cf7607c15f25b03b4bbf1f2e78b1a5842d02 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..61c49e750f738e195c4152b3d3d89995ed8139ae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..db228d02198cf9967dc51d9bb570eb70422f4924 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..62d87c862e8a57c682b3af042f56af6219371a05 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..54c15d66e5659537fdd0aba7eed741dcd5fd538e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4f7d8b5a1c730bdb1131db0e92a078061e0112c4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f53235b878aec25fb7ed3608f8cb2c2ee55852f5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d413b985b68b658efc8d4771f9771a153895dd0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4adf2e8b12c8d663e7941079d718ca04d7bd0ca1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9660b7b1308547c83102e1e7a9f6a603c4889f7d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..11a6f2d4f660c23029df81b57a7914f193b86098 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9a46ff6a9c906e7538795d0494bb7368f2708315 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e4606df53bddfad169bc678e41cc6e97447198cc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6edade038f8ddbe8bb8dfc37b9be4ffdd620cca4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..765b2201444e56f44f6447627e8edd57dde558d6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_management.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a699a70dbada19b4a2c32c8fc4d554c34a23b4eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_management diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..596d69375d64438f70093658783ae167ca236d0a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4fae66a7c5ecef3de82fef9fb280c3a3c2c3ca14 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8555e173ab35f1179371bfd767cb28888651cb34 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b64f4d9da4eddbfaa56c9318b1aa68ba17deb859 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c73f9f1af706ecde712b11fc5ddcd4792a2c919a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..456f4cb63edfc56346a4077b320ad77ff9c882cd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d0e0e05edddd27e5260b4890837d380c2b1206a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d65c6be1e2469f956e658ee2e1ea4caa58ee09df --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c152c80efd12264f17a001672b23329a0ae85379 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d5e2c7b7c2bb6d95a5628da15188429bacb5df42 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cacd5df735939ab579af56ac364e8c8f37f9a272 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ffdd86d2222fc24e7c888c917a0025aeab952c0e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0e6b5ab89b3178ee8fa98f681df7a9005173e903 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f894fdd77c764d6894740d104bc340792bcfab97 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d2d0cd4d9f6acf7b8fe7b18d7aa36fe8dc75f14 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a72a237de4e1c8db883d5ac37e5d9d6e61bcf522 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_virology.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9eeb7cf03856f104725d6dc1cebfcf0702d90dd7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_virology diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a1c243c8305c2680a0d1eb0cadba5deb45dbfd6c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ny/utils.py b/lm_eval/tasks/global_mmlu/full/ny/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl.yaml b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2476fd33a57e70d07ba39473a6952faca127efdf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_pl +task: + - global_mmlu_full_pl_stem + - global_mmlu_full_pl_other + - global_mmlu_full_pl_social_sciences + - global_mmlu_full_pl_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_humanities.yaml b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4b5f7aa4584b551d6d6da59020c38422490c0b35 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_pl_humanities +task: + - global_mmlu_full_pl_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_other.yaml b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..241dbc1cecdd7092d7f372559beb34e6e7019d1b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_pl_other +task: + - global_mmlu_full_pl_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9a50a3152c08a75ba85d72b4b0b41e28500b1f0b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_pl_social_sciences +task: + - global_mmlu_full_pl_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_stem.yaml b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3d11c89f119e921cc4e2301edc46f489c88afa38 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_pl_stem +task: + - global_mmlu_full_pl_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pl/_pl_template_yaml b/lm_eval/tasks/global_mmlu/full/pl/_pl_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..af8809dc2792042ed3144271bbd88c89acc8211b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/_pl_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: pl +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..37f611a1b7804755d15290516e3321155dcc655c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c274bce178e9f12885854dedc2246993e9d44156 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..99220f0ddd1d36dc346d8b778ddc333fd1f36f0d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..105926680ba0e802b515c0a5dcc096bffa4b51cb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..29a4fadc08d8af9e9471153fd98e3abef56fb5f7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cce1671cedb3a4163e6549ff2076f6493688a0f4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..79c6353082037914354e4ba4544d2960691d813d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bb630140eb6a8601990708831e3ab712f35ea7dc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6b42f767eae21b00657ec6c1a6823b9c9e8a2677 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..43bea9764df9264452f37bb547319b1bc7ebc918 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0c9ea60146bd95eae494eecd34ee5f0c72f9ad47 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..365b60a3e397e58268dd400817ba860da723fdee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2b9437e3fac8256e78d438cde65a037c9d9bfbf7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..648f24c422edd8433c120f37bea33493020ff8e3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..196de258dee0d6d30f75f5d06575bf45418382b9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8646b6a393c825b2612389ebc55d102694a1f953 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2d13d283c5f5556ceebbe8ea9e0e396a6155d93a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..15bb640b93e0c3d28938631496197aa10c879161 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ba96402838e5c30df22e46d46668f63633aab4e1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7f142dd82f39e5d474d9bd1fa8d647fd1e2201f1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..99b3b9da11cc8bc221d2a60b10bdb89827e8c874 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e99b2fb93d648bcdd1977927e6eba407dfaf554c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bc6113f78f15d8d51b79bafa64ea3b0a42154c41 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..05a7de9bfe7e12ea3a0eccb781e53b256de93d4e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aceda633f9b60feae440661fcbd31ac92f5bd490 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6eef2cd8578fdcf1ac0d8e12584b576adafab1ee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5adb5fa19bc7b88102d43f3b034b39bd60f794df --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fbda792097d4738c0b6890d538dc56ae5944e6e1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7eb09362a7c24dc436ca10eca7ddf603d8ae70ed --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b7beef5b06ee6122943e89c7585778011d3a7089 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..08f45dd9c748303c2c76ba46da529ca40ab562b1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..99664de8772599a1962da0318d6afef765876b0c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d63f6f8d4af5ab8920d87c2faff92879cb639dbd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8080ca8d0ebcb5d7f503d16cffaa98633c763c03 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_international_law.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..425695c1d5c64cd8b350c944e87a40027779a6dc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_international_law diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a6455bd71724395745a31dac3e9c5fbae81b4469 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f1359b3a24e3ca52394fb0d524c87b3b7a80b067 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3d7bb0dcc725fbb2e830772e759c44b0bc1f21a0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_management.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f695226c1bdcc2a798f8595a06c12c529cfa269b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_management diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_marketing.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7fedcd3f8817be8bf7be01c9d3157cb18200cb77 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_marketing diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..89da9f67565089e10cb852df563aa289afa0d455 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6f34762c99cb9bbc6d273c016f4e74c91baa938b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..25f201f4b6f16809a13a627027b1a0acfac21d46 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fd08e6e13257a3a14a50e49c52212baca4c1f19d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b61f1f17fccd40d430c7292e3337862cba73b76c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8c1bf6dc13d80759fd4aedbc131bd5a1f7777989 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e5329e1330ba57dfdc3799bf5459eca6b00484d5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..514b04cd1167e1b455d256bd4bd0aee450a53ac9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..99c719f83c36178ac6aa873284d2c24742d6cc66 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1dfafb254e809dc16186176bb61d07cccce71b45 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5b6181c0c91f62beeba5d6f9ef2aa561438a1f0b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..acf874dbf9630ff7b6c8118696cebbea93ea970b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d754904c4f04632e5a869e4a3478970ff9e0879a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_sociology.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4bc0fd8ff08ed950cfb842f368a0a9d8c7707b19 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_sociology diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ef719be1f48aab6516e2de1462b411d0eda8dddb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_virology.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f9084c13da02ba1c7cf3a203404497bbcddfb3c8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_virology diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..036d0f4c0a544f0abdbc7bf5f2611d7afb20cda9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/pl/utils.py b/lm_eval/tasks/global_mmlu/full/pl/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt.yaml b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ac79bda10fbf5f472c8cc611240c8126dbb89c8d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_pt +task: + - global_mmlu_full_pt_stem + - global_mmlu_full_pt_other + - global_mmlu_full_pt_social_sciences + - global_mmlu_full_pt_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_humanities.yaml b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..261a70288e94b635e3f41e5a4b6b2ca58d971368 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_pt_humanities +task: + - global_mmlu_full_pt_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_other.yaml b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a61b12f504501c34dcfa4a67eff3dedc5e88ec4e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_pt_other +task: + - global_mmlu_full_pt_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2c04bf5a4da9fa65ffebca5c3d042cd0412e841b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_pt_social_sciences +task: + - global_mmlu_full_pt_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_stem.yaml b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dc3d36104348d3a60a420360c5c47df11d108c7b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_pt_stem +task: + - global_mmlu_full_pt_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pt/_pt_template_yaml b/lm_eval/tasks/global_mmlu/full/pt/_pt_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..66ba2417d166085f8b44cc257231a1e548166528 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/_pt_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: pt +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d9efd8170b1db9b7348ce46bc50033e0b5544805 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..45390503d17e8403871c498f4e33e852aa3019eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..90880cd077e253cafe68709633b533a02a8b0b51 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f18ef2d8549b1ddcc64a0b3a9c18d94eb1116ba7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2999a02a756fa463d199050fdbd1cd579f747dfd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0cf0a61b55d9c7283755b4d7c1f61c66bbf2a95a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..91d8cd2ed1e520094c8b3253cfd17df2c6f1dfc7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..68592aafe5b5b00c38bad52edde00f8535a32fde --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..31d7f6afda5cfafd2531f8eda65a13c676f498de --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..46ec823241d6af4e85b37df2d4649260d4de5c66 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2cf6402dd6ec95592e34932a35ef8b67829a61d3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0953a105a72a03a2f2109ad7bb47eba29995c26a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0e6e91a9935da462b6f5bc101fecc2216f3a68b5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..67c29915c614a1d01661a72df1a56e27f6a54f6e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5a6ba82e29b272d00fffd2374659d010ae8b0f9a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3d66a664c39f1b46ab6e77ae665e57236c334e75 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..683d6dddcd054ffc1219e6db310714d70c74b04e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e4396542a55f7f433bcc361a7dd27f3ef21e365e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..89fefd1c32a73e2beb44d383632ea1e50b8ec63f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ea323d8a64f0701fb95f14b306a175af2227bb13 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f8f008235f7fdb1a851babb6935645144d24fcd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bef7a31683e7bd64839b1e67a47ab395fecd2507 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69c297805e5137c32371408199f7aed427ce273 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e3fa920d7246a6e88c656bc3bce9270809f9d7ea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6b7ca2f4e826b4b3116b7d4f21e8f4de04e73d33 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4713674dcbb43d0460556eb63e91c0ffd1bed583 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d6475e995c6ef42e7dea9e14262ccbc8cdd6f0b8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9eaed31adcac907953964d9cbfb46f563030a4a1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d09e1eb97940084b3f35e4a212f9492c68f20143 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3d8c1447faca7b6a41e644ef705c0ffa7b4acd88 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a883b438ef06e82e986f53d1e91c71757368c312 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6ea1454e35beec1ba65d7235b7d984768b45e09d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..34033c55b88be89f49997de2820f88eb689d2e2d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bf961c33a549b15099087ec627745a5e273f11a0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_international_law.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5247fc9bd030a7c43b08c3146ef05738449bf953 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_international_law diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..07e78da50b5eae4d39f36a9e39560826618f950f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c2451399b8c66c2394729a43dbab2525b3d4d501 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..79c577eac708a196c8015b8c0413f884a275b8e0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_management.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a344b1c32eaa834f8dd4f84d08914cf71c8ea099 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_management diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_marketing.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eeff36b979929dd011e94c56c78d418d9ef9da55 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_marketing diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..27985380f5b9d2e9aed8b8c67965c0893d78c26b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e2fa1da1c1838c94b0e4a3a12a75928d0e020514 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e83d186e2f9ee63dbce9e5559618f77443068d42 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3529a15c3b0e6f06b2cc7c037965b0232b03111b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e51eefe02a33c8252f308a6cca66b7a17365af9f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ec0826b95c63da87f3834a0f9dad7c744895e291 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..324dfe69ed57361ae56f48f16c72a431e86bc503 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..530c918efd7bf3055df23096dde71892282efafc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f7a3679c9845fa23a25dac7f39a1f504efdbc88b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0f4cc0066a1f5679d3529802fd5a9e561828e733 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4c5884c9f668eb9816d6670260447e57bcab3dc5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bb2d65360ab95ef7c128afa27d2126dbe75070aa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1af8d6621226920b8319bcedde1eee2d21b45b5a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_sociology.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3ef8fcb7972b983ccdb8ca46ccb3616b0371d2dc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_sociology diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8b48f52873a61574601312edce12c244fb550dfd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_virology.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4b0de75322e453c0b3049c2fc42e952c3a8ec1a2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_virology diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..796485864c8e686a637f68bcecf3a32136368138 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/pt/utils.py b/lm_eval/tasks/global_mmlu/full/pt/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro.yaml b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b3aa5f4981a91025a3c994a95207e1c83b25e609 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ro +task: + - global_mmlu_full_ro_stem + - global_mmlu_full_ro_other + - global_mmlu_full_ro_social_sciences + - global_mmlu_full_ro_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d54268b0af5ffe2cbdd10d3295aff7176b6b4819 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ro_humanities +task: + - global_mmlu_full_ro_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_other.yaml b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4e58aea9a5d51491b7f23b4976124abec6de75b9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ro_other +task: + - global_mmlu_full_ro_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e1cb84a91129f805f5391e442c84c2ae37967ebc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ro_social_sciences +task: + - global_mmlu_full_ro_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_stem.yaml b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de0e406fac5194a846713eb63cfa172003515ac0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ro_stem +task: + - global_mmlu_full_ro_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ro/_ro_template_yaml b/lm_eval/tasks/global_mmlu/full/ro/_ro_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..e5cb6dd0ecc8d32e03797c70cd3414351e14f63d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/_ro_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ro +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c505fb8b204fc910e30e068462a1dd74111fffce --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0c13018cdb68e55e3f55e7deb4e8a69cad49c749 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9f4caefba6f1651703a66f6ce51d5bd2768fe410 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1c1387fd182fc7aa9fb098e2b9482724728fbea3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b9e0dbb473cd251e83ad798d2a9698eda30fb5a0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5bf14ab0f5d8a36b07a06a5b6d635bae4b2c5e28 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5903474436e9ea97e7fd86cb1e3988132e706fdb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6bb64c2e201d7722ad353f1db87e38f14b0a3baf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d719a5ef98b6aa7be608a6e100583c75ae874aaf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c9284a8f9cf6ba9c892be6251b77432f41db9fd1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d27d843707119cae640c69659f23da770ccb3ed --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d63556e36c5ccd350b1bf0a5419ff8a44554fca --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..25f30a368d61f9a6a240db0b657a262718ab43a6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1fa6b5d23cd039f9b3e194620cd0c1dd84f5c7c9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f6eb4b6e4ca6ed74a7ec8a26afc90d63ced323e1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e99772e27ec8de94e5ec2773ff8147c942a37c56 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..be99bd00764538247bd19da55a1c12a1b99a3344 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..819937e79b6b56eb78107ae1331df479c8c73f2d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d7509581cae17e6a7800017be253b4617ddfab48 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d089583f6f3afdc5bd9b07a315c6cff8f42fd2cb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..46d5f4723ff084e157c09a2468ab28586bb6902d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1a1ae7e72571b56dac54b7f4cac108ee35a3be9e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..92935be5c49c374198604ef6c3c4597c6e6178bb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..efd2a03fa59de3e31e0e8aaed19868f9ebb6611b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe2f97d1f167d2c37e67f85bfff4d4c5c8b9177a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f0432a01f1bb3511040ece9d4305953452d5a95d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..507fab8693c7962e681e58bcf64b76d8b0da8649 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..19a76707ef5e11645afa82538a84c1a57f76c2b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d27fc262eeec0f83d51323082afd596b3e86e71f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f8023bcfc7dab66b4090240a6a73d95f117b1c9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..acc5fc418423e59fcb7e6afe369f69e701eb6d55 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9ea7c933656e09cfe84d071299b74af234c08e6e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6b984c55b4a5550373eed5879c85bbb0f2b0d099 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e2af2cbee1baa93b7b48d9011c5ca11d55404d80 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1cbf3d032bf7ecd8b5ad8174a1f91ea9653b9d5d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d0acaca0aa38f10f74ed13f77b9c02fd20cfefd5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c84234a0b147c90af4ce3c26c78d25eaf5183681 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..09237c9ee8c619e6aaaad821688bea21e272ed1a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_management.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fcb3f4853e2834a7deb38157a44524eea12292c6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_management diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..33b486c04d1616d6688e820155a17c250f55a5f7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..09c3d5e92db5790b3d2c72a9768d5b83d8532c35 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e744e1e7c49d2804a3ad927b6a6498f1b0834559 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4e6d4ed7e76f0ba27550c8e43b6458ab1962623d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d0e99149a67f6eaa37342782e26320ef049e5931 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..850262c1e1d1d51e1f07460f6862a3980142fcf5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9dd2bf54ae7daf8a3357e9c015e578c56f876dea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b2ecf40d787a97a3ba35ba6ed038ec280d59d878 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..db259766768e6debc757d6756cc5f5704550d9f8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b1e439748e9cfd8cfd63f59c01acb4ba59cf3101 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0158c5453d28aa8be0e1740a39838fd139c4f23b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bdd7ca7f2495b7b17dad3faba0f894cdd3b4068b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f7f0f514d3d508c193bb2bea4012613e7fdefe0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..be9b334ec12c3ccbfc6b4f0ef34ddb6589a4168e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f37228bdebc9f50909fa636d97b1a037fb5deba1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aae05dc9859b160f31c62e5e21382d310be6150c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_virology.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2d789c204d0d105d92a6623ac7ea513b7966629c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_virology diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..40ff8228470d7438213ca4580f7551b94822e40c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ro/utils.py b/lm_eval/tasks/global_mmlu/full/ro/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru.yaml b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cc63cd34e0e65a8f909234f15cbbe967df557cbf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ru +task: + - global_mmlu_full_ru_stem + - global_mmlu_full_ru_other + - global_mmlu_full_ru_social_sciences + - global_mmlu_full_ru_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..55422b437f69bbd56e9e6f0e246da26a1de3c10c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ru_humanities +task: + - global_mmlu_full_ru_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_other.yaml b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d47ccc605b8b6afbff1b7b470829ac1f0cd71937 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ru_other +task: + - global_mmlu_full_ru_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..12d48428a2b51357a076a58ff14dd7e196523ead --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ru_social_sciences +task: + - global_mmlu_full_ru_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_stem.yaml b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..70ae3edb7ea0587dfafe60711dd9825de283b9a7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ru_stem +task: + - global_mmlu_full_ru_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ru/_ru_template_yaml b/lm_eval/tasks/global_mmlu/full/ru/_ru_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..4b2f491bd4d73b02b679f4c4287555249498bd56 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/_ru_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ru +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de158df873f4ec476e4ffc9d591a212043c3ef3a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aab717e4f9841657565812844e819995e47c95a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3d8d0e32e6cde61a7ac4894570d7a66f1b76ed9f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d2855ca36f8704568c90714b10868a5a0b3fba1a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2efe082980f203413153c58337e5dc1d3012f258 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..96d00deb8741140940781e0491178eefa781b467 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0a5aac35c603ce659be0f414ee24a5425544ffc4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bd8bf28bd66a7187914b42ee43d9fa6901147af0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a2e080c3d5de2d361c8c8e183bfda24752999f67 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..70e8448ea1e2f067e672f6353be85ae066d530f8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8e6ecbcf28d5c4ba07dfa79aead47b46e82aa3e9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f196351af89c409be27bd4e761bf32f1d5356b91 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e623d78fd40ec7d03c05beabf433463834f4b3d2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..df35a1f608e4de41b4109aec60e216d5c60f4020 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..82c49f89e5c68e437c3d7d8db6def0c08cd83283 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6ed11c5faf895a5bff9b9e50933c14437917b7cc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8ebe62bf428abdfd9f252b9e1ca459f144862d66 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..27d6ad70067badc93a406bda2a06cd5d66d9ab55 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7860e73e7424722dfcff244f2d64e4fa4547b568 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7596daa3b86087596bcacd192c4511067caeb80f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ecb64d52c5a45e8d8101e962416d22c9ee282e7a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..92feccc59a718f6414ae7598432eadeb4f449497 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6f586f5005f7c57f26ae9b1ce33fc718291fb121 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0ffc85dfe226463b2c6e310848f25de9aeb34743 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5da1320475996ed8fa9e4777d940be24617787db --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fc684975c906b9c8c9d3347672491e366bb140ee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..84887d18ae74fd049aee105968fe86a07fe6acda --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..29ddf5bf3b34e4ed8ed6dce9b4d092e387d4f1d9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a0680bad6b72014110f5520c4a14405a3be9e316 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..07ac341bf25ee3baf8f03f05239825e193f58dca --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..18e12bcd3f9aa77945f9ab99c9b09904e1237565 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c37522a22c9715dd437e61796b83d1e0ac4bd47d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cbd6bf329fdb61eba991cda68125a23bc7de9035 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8766c3489d8af3625e8a7ba4c1e2f8ce3080fad3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4edbb98cfc336dab92e778c0da10a9a4580bd8d4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..24cea6320aa1dd60e9bff297b89b868fe8ca41a6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3160fadc3521f556371ae8aa93a9f11a6758fa63 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b8e480e68776c0581bee352a1efe3607c33768da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_management.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4a7b77a14d2ce5f56881970c134f9e6c7c7dc0fd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_management diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c71a4f2929b3a5ac670bf2df0683e81594055a0b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ac34ba2035918081ac852af72b41257c4a11617d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6049ccb1ff315ef186c707530bfc999cd265b56c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d974ccfaa60f480e0fe9c9ba09427994c751d3d8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f05f7de9a9ae98daef400ebe3e72fe645a839266 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..59cc8deed822b5d1c6e6b5a494adc5ce97179562 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eb78b1f758f52c7597313000d322ed433043fc3b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..685bb2a4da295c53d7a1a37c09181355b655409e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..35c21255fea4db12ceec3c69e560b09ff0bfb4ac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ce70d0067ab770203c1cbcdccf68a250dea4711f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cce88d1dd17011bee9b15e4a1e1bf1272157225d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..39fc895382665a7a4173d70d62de936c0b88a1b8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3dfd71cc390f8120749c4a1ab99f006b05095eb6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bd08ea345de134bbd88a0ac9e366a6d1c9a33a00 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ef616ee16a63c4c90989ab2b394164fde267389e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c8244e6562ca0668c1f24e962dc6112ab5e2febd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_virology.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2f4df8105ee4b6c480ee01eb0bc599e6177d59d1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_virology diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..06f71986d3c1113ea2ebd9d6e8bc97bc442d9506 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ru/utils.py b/lm_eval/tasks/global_mmlu/full/ru/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si.yaml b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4deed570765b2e43db07b63b1eefa60caa3130d5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_si +task: + - global_mmlu_full_si_stem + - global_mmlu_full_si_other + - global_mmlu_full_si_social_sciences + - global_mmlu_full_si_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_humanities.yaml b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b97994d0495b68b8d693e84d58e5c87d97799cbe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_si_humanities +task: + - global_mmlu_full_si_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_other.yaml b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e7600ca45413d90b9b4fab5fbe19118baf197e16 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_si_other +task: + - global_mmlu_full_si_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4e2351a28a55ce2630d7f671ca3ce2f24c083168 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_si_social_sciences +task: + - global_mmlu_full_si_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_stem.yaml b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8878bf808bcdc726c1f0fdc7a42997a142788c5f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_si_stem +task: + - global_mmlu_full_si_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/si/_si_template_yaml b/lm_eval/tasks/global_mmlu/full/si/_si_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..5c775b20106a38ce2b9a42c91113dce664318e2e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/_si_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: si +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b81c580338b6b4ac0da7547f4f6848bcb3c048bc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..32315245625bea5e6b01201d100c9c62847553a6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c7ab9539e670ad3b93b53034758bcf295b4be2fc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8281fc42f5a688993b6ba454ce8043fb9a39874c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2a7f5cf5b5e051f092dcb4c2595d177c4c24e832 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e54148da1d227907620fa8cbf7f46beeb5d3f3f2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b797ac60370a3efcaf6ca62b8a612e5a940d4771 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ba69de35d301b43405008fcfeb04d180485ff27e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..65ed942495ea5621c79a905d9f3a31713d5f2c97 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1418aa0dbab50f789977fb828d2b407d2bc01201 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cb32cd4fda7a0bc9c2996f8c648a3f4cb23fc55f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ce5ab9b7f949470dd28358809aa88c0e3048bf05 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c2ab5718f2d7ca0972c6a5d46577f593df036344 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e764903b503c384c864b1c5b528598446274ea3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..99679bb0ab3c3b1ae4007ba0ec37b3584a292396 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..553bc9bb7afa1e039e13dd460e20202527c3e0f0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..112814b6c669a598fbaea0218ea63a03bb3c1947 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..008b55377da5099f0c6ab46bb4b4cea774548c32 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fecd995a07e95406f4fb29070e1d385bea85163e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3d3018b606e900b786d855bc8d486894802aac4c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e80a1f2c4c946b23dadd292e73cb37761b2555e8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..10e15738479a5be1d7020b3d6a3be005d5eb3ad4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..12d90b97f710ef2d2691931b2670792d431b101e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d285c2c6a813c62ee53f4c5df13d315b099ee1ea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1c85f2dff989cde83531f7ad470651f094e209f9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b292fa50a63c5e02b6bd9d73d04e9f6fdce4b90c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ada74f5f2d54e62798ee77e5ffa8b5dfc094da43 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..84bbda2898f8832fb3e14292abf78f4717cc82ce --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7c378798ca925fa44693a0c5340347913676b9c2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..13758f22fb986c3e3db008f711ad5d1c2ae64af5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0fe85e14ad380875216348762842e4fa0e8c6c68 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8afaa392bcd373da763d49609ca14d963f8f5501 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2cf69a6894d4889ab4a54b63fc20149ee8846d01 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..418927d5ba316d75bdeef7cad8eaf1e5ce0313f2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_international_law.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de0a611da103b2f45c7175a81eafaeffd41f8b93 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_international_law diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..10212173664e47f7297fecbbd2ca6904e0882a7c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d31372ada4d999fa9240a0ca1f8b45ec16ddd507 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0e3d0e7c87c416910e86d7bd8cf59095f408bed0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_management.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f4e29c9ae4d6b7f7677e049f221fde9832ed1eae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_management diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_marketing.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8dff414a16e6a5927ffc00d9b203515d9ea60433 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_marketing diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6160f02b4b9fb375cecbc13e330aba0772065f61 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de1db6c981aef6da775160b3e2734106990cae92 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d48cf75c9333e3472447bc7a205e61e7c061e397 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5d08b811f33ab7b1f649e8ff37e2f1a614cd6147 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3163db497fd90e3229709c90d4ea44d7d340ef08 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f809bdddffdf4b4274703e58fe7964cf427aa63b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..964e6ab7b1d24be8c872e4bf914dc981f1c5db9e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c04e0bbc337dbe6c191bf4a8c9239b2883e67c91 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6542f14e34c0bb181ee423984cab769009b05a7d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..384489798648bbcd476d3389f266552f343d50b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..80f36885b740754b3daf6ab884473713200c62ff --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2ac5169ed493c36faeeee1bc54734869491fd6a6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..21423506a8ec993e4f99773332251bc3d386906f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_sociology.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c86ee0a346dee2d26edcc0eed25c164e5a355c32 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_sociology diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..28c238e65078413dfcfdd1fe629b177772414c7e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_virology.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a193546072886aab9584e583a2a8caa6514b3952 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_virology diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..424c23c284c7df240f9e50ce6b95573c627b87a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/si/utils.py b/lm_eval/tasks/global_mmlu/full/si/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn.yaml b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn.yaml new file mode 100644 index 0000000000000000000000000000000000000000..98ced9873144a24a27c534e689fa7d5fcbd6286e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_sn +task: + - global_mmlu_full_sn_stem + - global_mmlu_full_sn_other + - global_mmlu_full_sn_social_sciences + - global_mmlu_full_sn_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_humanities.yaml b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..69690862098b99934a3a6e85bd365b783c230219 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sn_humanities +task: + - global_mmlu_full_sn_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_other.yaml b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..18e750b6291313947dfd819a88bf26893ed31801 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sn_other +task: + - global_mmlu_full_sn_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a8e762154e8c4e6b978d306c29124d6f682cf4bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sn_social_sciences +task: + - global_mmlu_full_sn_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_stem.yaml b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b3136233b403abde2ee87f794edc49c97edae5ba --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sn_stem +task: + - global_mmlu_full_sn_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sn/_sn_template_yaml b/lm_eval/tasks/global_mmlu/full/sn/_sn_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..30d50ba0c935327da78026e4056a8fb9cb0389ea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/_sn_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: sn +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c4de495e9abfd59ec52b42a3697931f079937327 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1ef227aa5f15f72002312f64a8239ae85637981b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8662ab9645bd99bf373f765af1ae1c00f4f09b96 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6f4741c04674ae249c9692210288c7f1182a39c2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7477170ef5546c00cfbcb87c77e9bbaeb97d9db9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6d0ec277caa41d916bda6ea04f25722c08e05ff0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9f0c4f42643cc5538cd187f098d1108a6f143653 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c8651ee192a5024abe1ded9effe39bc4a8dcb8e1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c1d1a98e0ff81247eacb8e1a82d035ac3fa6f1a3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d9ce08f307e45ee2da6b4ef44a43ae03ce9a89e3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ae34a82aa856d75c5ec740c2575a39ccf10e5877 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4b41c175ec6555a7277892d9acd598dba0e8e817 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5aaa8a782d361a1a777ac2f1755e2fd7360f84c0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8606e96c2959fa67458306f1301efc25cd7e54a8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9c57f7038f6319d5af8bf84817d2f26421db5b41 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0ed5b400c42a3ce553cdb51631041da0e67152ef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..55dafc2bf4a6b0bd2d70b864f0eedd141c2cd8c3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5b8ee96f09618fa6f2fbd3ba4aea6ecffaacacc3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2597a7d75e9149fdba41fa09d8654db5dee697a6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1e6be4e6115788e02ce217d961420d6feca650ad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..446da912c7dd1e8bcf96f3d0c0b21647050191b9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dd8cf61c4f32a26c8a8ec9c11c24a64a9442c3fc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2e178adfd6e82f6018026a2f282c42af949b8635 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1ac4efda6b728e8b874ec8415a0975526ab7279a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..23ca0b41766564f80dfacb764131c8d962a7d400 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0bd9be1952012c77911a642bed92692728807198 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..916e14caeb648a2fc2257e1508426576f1859fa9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b6a3e60c2c54c2f6d92fc2713f4053259862df94 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..62a197c0eba0a301c435c9ed68e1f108c3716022 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..815cb60b16d7848b05b40c6ca08b871ec483fc26 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ff9f970e08d1206140a9e7726114c3a001edfa99 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b2dedc38d7e72ca888b3551b0ac305fe8b3cd2a8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0ef139307183fe54eef709b0f9740a0f9e1f5747 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a52c2ded23646b6c837a6ef9df49288e09eaa81b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_international_law.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..648c3dea0f54f303d76bdb2ff66216675813611d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_international_law diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ca63c411a7199755284772a8fc6e451e168b2291 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d74a7f18eca74b91c10147d9bb3dff3f775dd9db --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..db272b3b4784e40004eddd0aa250559b7730353f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_management.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..db3bee4d44a962c6f5eac646b936d8d47c2d0947 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_management diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_marketing.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a700c4e7c59ce9f47749d0ba5b2c6fc8b66d44ef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_marketing diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b826b187e34201af1e719e556062f2cd1f99a466 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dea895aa9bb6a543368b589a320c44d018b4f59a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b641f6b2afbb637d3fc41280003b62c196df6405 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2951a953c7a42b7c95e7682abb9ded91456a0f69 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9816d8b480449f7be8af8c1d57ad45715e9ad447 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4ea1050502fe65602c7449e16c395c33fabd927e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e941437beb64ddb49832cd3883f6feb003acbb9b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..057a197dbf9033fdc09890a2848cb5414c259c0a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..72c9fac75d480eb4c0428f9b505f12918088e02f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e727b3cd510142625bd0a44c3c399bcfb0dc5f9b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..341322d2c83c20120ed86f0dfb0ca0c28940dba5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5448baa47394fd79fd7aa472cfc1c155069da575 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..542c709af46185561d0785a4f58f0ecb484004a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_sociology.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f2913db53bde1f7289e7b40ddb08c01790ad8a84 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_sociology diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ad4768478d16b0fccf2e88dfce772908b7cf993c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_virology.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..254fedb4f7855067d1d5c95446821f4ed89e20ad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_virology diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2aef6dfddb867d78365e083fa0b53835b146aaf8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/sn/utils.py b/lm_eval/tasks/global_mmlu/full/sn/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so.yaml b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so.yaml new file mode 100644 index 0000000000000000000000000000000000000000..014a412131f5e586e6c949ca386249c454bc5835 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_so +task: + - global_mmlu_full_so_stem + - global_mmlu_full_so_other + - global_mmlu_full_so_social_sciences + - global_mmlu_full_so_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_humanities.yaml b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ff78bfaba252a3db2b0c3c028440fedf971e92b9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_so_humanities +task: + - global_mmlu_full_so_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_other.yaml b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eec8e66138735a6abcec79f10a5888282d25bd40 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_so_other +task: + - global_mmlu_full_so_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9d00ea1f54b691cb797244e9941dbeab2d42fd9e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_so_social_sciences +task: + - global_mmlu_full_so_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_stem.yaml b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..497b9b01e352d37ad1a5fd52757130d19933810e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_so_stem +task: + - global_mmlu_full_so_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/so/_so_template_yaml b/lm_eval/tasks/global_mmlu/full/so/_so_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..fb052a633605159b0271f62d9ed512d447daa0c6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/_so_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: so +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..afb5d9084c83935d297c6e120634c9fbce9705b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..79f3446df384b530c631438e174bd608e077a610 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..54a2faa0913b44a6896c461538f6e92c67727393 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..65bc598cb2a4a5af6c008f1dc9684e8e66f5d448 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..224aa39b24795ce983e3319b3bcf3c708fa50d96 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..758d22c37d04b323028098313c84dad1a9d6f716 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..35c224308b993c3330d8dc5963264be1ae2ad73c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..86428ae8238ec533e73bd0fb73cdd48c527a744d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f9957a2349fba24fb2c0fc9eec0d4fedf687dbf9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f51a1b123d8a47d5246cd8d3f8d72f6ddc47320a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..43388d6c8d35e62defc815b972bd4ad17e2ca6c8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8a5563308f90a715c3216218691a957cad65fd7a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..97dfa147b60925a7f249f774cc40c7185bd7be28 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9792659fac1ee2ff448715a5491de7204efecb93 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3ed44e41f83996cb93084fbc4d3ba68cb73e34f2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..76628481f9bc0a4ddbc18018f0d57c9671c71140 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4b7645c8e4e52090be2305b8c726dd197e1408e0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fa75e6664fc2609971850c75df2c5f0f8a0534e0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d3ad29d4c23a2e0940832f9876a7aa8bd014b801 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..274af23bfdcec5f12fd72bab441e36ba30917325 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6bce30d68a11677821ff5996f696d263e0f5eae6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cfc44f08d6cfc94eb23c6c771cd5634684fae266 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..55479c399128600b0f78b28088ab4846c25478db --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ceb5a7014e29946b3ef4ceee91a092c78d44d556 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0c403ec513ec5f52a27910ff80fc079005a8da6c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e8089bdf87c8b494a4657c15400ab1787ab1cd59 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..32cacffea668b3cd783bbc3f05d8445009dd9c3d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fd2c35ac8eeae6ba49aa768df65c516914b04e78 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..26f2cb3c43fc9d84809dd05ce071bab3280abe8c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..730075b1c1f1afc011e4ed7997cf0d4111ad51c1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c9702a661bdeb4535b3f793b00031f957764c688 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..78a21d5d972b73e3eee5b33fbc0482506c7e04a2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c95b5562a636c817a5b9e84fce5c1116fad9875c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..632778d3b276ba032f36fd553ff1d0f86c211661 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_international_law.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2d5ab1c53452229d9af58e781357d2f206baf4cf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_international_law diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1372a1d17cff381788856f6cf5382305a727ad0d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..19a1120eb85f9e72ec856ed88a3eb9389de21088 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c1e13ddade10f3e6b90630eefffe85e3f5c8a506 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_management.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6e325205640a7355d66d25b7aeb3f0bbe7d5eabe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_management diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_marketing.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8b1c002fe5a8a6a05ea9970e752fa5b9e21f1e64 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_marketing diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c0136dc694187c5bf3732c150eea327f14a80fe0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2b8a33ba92fbd59ffd6ae31ab87e82c42952ccb9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c1bd0011b8bc226e01e5bbd52eae40d09583f131 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..60418a65f9377ac7aaa3d9056c75e996dd5c783a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5aa40241a4b85d9d7420843cf34464a95d21b5c5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..421a98016b83cdd1794302bb2e25f1daa5e25787 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..721bfbf22d876a9e6f17d41bb05e55782d442995 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4ca0c5c9bcb3216c7b301a53ece3c53e4e47e1fc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7f57b5949641358183bd1f21cd5dd0277ecb36d7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a7d6408e291220d4e221132cb16dbf948253520c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a03de5bb99427c9b189597be049277b5614de156 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f7af81e6ae2b4e2d0b78d31706cfe663b0690418 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b52ee25901816e8015d4a83d458295f675582383 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_sociology.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7f3847e67646ebe82f894e56ff7ff882bf764c13 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_sociology diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a6017167aafd4e39a0a247494fd2c578d9da8b46 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_virology.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2dc85b324be8c45c7b59a01d1cd537a207656651 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_virology diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9ca99e5b4eeefbea4f282e76411f7ef7c260d038 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/so/utils.py b/lm_eval/tasks/global_mmlu/full/so/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr.yaml b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e322d980a1c2778df8688c371413c19dc4195528 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_sr +task: + - global_mmlu_full_sr_stem + - global_mmlu_full_sr_other + - global_mmlu_full_sr_social_sciences + - global_mmlu_full_sr_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_humanities.yaml b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..080bc545cb3f9e0d6ac97f8d7004fc763c507ae8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sr_humanities +task: + - global_mmlu_full_sr_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_other.yaml b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9f0735eb4439896f0cc29aabcd23c57c0a90aa4f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sr_other +task: + - global_mmlu_full_sr_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bdc29d1f608c6ace5e8c7475dd1c6c27547d5cc6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sr_social_sciences +task: + - global_mmlu_full_sr_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_stem.yaml b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7c4aa636944898c99dd4fcf7e42035a978932eef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sr_stem +task: + - global_mmlu_full_sr_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sr/_sr_template_yaml b/lm_eval/tasks/global_mmlu/full/sr/_sr_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..6af61b3b28d14235a72fa8b548da96c7f73c5a49 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/_sr_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: sr +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b3275870e203bca8e21570f6cef1996b412c0e9c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5689af7369b640521929adde0bbbdac8e64478ed --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3d23a4383f0e30c1b700d077fe7753f4db44310c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e89f5e61be0e2f111aa5a2a490fbf18a2e570756 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b5611c15c469aa2e4b0471bfa4b79893f9f88f21 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9e28c3034aad065087ebb8993bbffab306f9b532 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1eac952c9e44e97a7678c648c5b917502c9ba185 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e1146aa117a6b8049b105f8b65a2fed6fc374fd8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bcfda2ba34dd2c8b7c7b391eec67ac3a6bcc7cfd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3beb5b26c1d4fa89b5e5b91e7f451baf016976b6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f959a02fcafa5915d1e9c826c956662f2be8b393 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7e8761e0d3836bcda9a1f1f4f0fd5ca828ff775d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9325f6de74b826969db631ca40120e821f7cf1a3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cc4a5bcc3cc759d0df3339a9a558ccf31cf12553 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d3a5a78b53d9cd2918383a6e8385ce8c6dc30504 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..50f60166c482fd5721e5ef8002f5be8d2b40f5d0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8bdd854f1ab22207209541dadb87085cbcf9f0c0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..88862d212b1969fc4f02a0bc875b705db9f6e14f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f2b2952b6706e4e29f6367da93081db27338cf2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6b89deb12e19d7581fc914e84be334b90a666985 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..55fd7e8e662ef66d2892dd16ab1372ae9f47028d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..946acf0ee2a3b5defe39e075158bcb7326ccfb7d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0705897153d5d0b16df90781ca0eb75a2ed2cc78 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a9721c9ba99fbcf90e560beba130c4c7ed55e234 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fedea95a6bac0ee70fb0f2f76eddf37af23c8ef1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dca9e1402fe5a89d4363f955db72b8a2efcc1ed7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b01276f650319b6ea97b1d63b9c6d46e931e0a64 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f549f8ac2d25f066495e773c2c28b46621cc7fd6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c6b31eeeda41b4d6bff48d23940b452887f45d59 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..12d0f0e5c76c2e0b8c013e2e2d648a845dd3dbfe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..98c401001b4dc83725084429c694cd5dddddaf8e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..76e6b45c8624d95c6e9eb80b17d4ddeff2ac2bc9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b0ff1d95afdecd3c47ab8f84bd771631141d5874 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..73a30099c741f49d3d5fa3ea1897e9e9e4999aae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_international_law.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0aea0826f6db54bce4a14354baa668c1097c7853 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_international_law diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..debe604fed0588bcdff83d7d1b40b4763cf01dac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..407417f3ea3e2583e01150b5d730c8128cf990c3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..513a7f8775ab9ef422e32782169ff829d32c9950 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_management.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fca9de044cfa215d365d6edf7a18c0e5d41dc192 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_management diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_marketing.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8267563e63768d3d500c19a6741057e238cd0ef9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_marketing diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4ba860f2daba6111a78a08d811766891c985a913 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ecdbcea91d15ecb8612d53870fe16ae82c486834 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..54bf349162b1417c8f0a09981a719fd09c23bbf9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2eab8d4a06dad83bcb05acc978f8ea6367f122ac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..83e1b84c46f4aee1eee31061b08b58aa8770a1b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..654ee86b925b233e1ef284aa834f6a1968cb29fa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3a2f944b7bba7f8d342a3a1bd20601fd2d5034e8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..648ae0cb03e7f6a77aa29767a784a748b3d7b738 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0ee8a831bcf754d7d0f5dd005c5dec2f08761017 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3b14211541c392ed442de13c5ff78c467f44dcfe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..19e2dc544db339c6ce6224aa2062f4f5a4464930 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..043024c0dcd90635962ee2b4f03d360c80440932 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..24720925a41855ebe53c1d25abb4f510d322722b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_sociology.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fc93c5e07a46d3a54c9575ac7d51e618b8165e9c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_sociology diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1b338dd6b4034b6f47ac59e2dab09215f28161dd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_virology.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b07588ad15476540ecbbd018e013fe3e7de1e969 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_virology diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3f78403edfd7b801675daf8ee10225adc82a114c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/sr/utils.py b/lm_eval/tasks/global_mmlu/full/sr/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv.yaml b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a9b0dc1b0e96c8f0e2551fe4a31b730ff3b881bf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_sv +task: + - global_mmlu_full_sv_stem + - global_mmlu_full_sv_other + - global_mmlu_full_sv_social_sciences + - global_mmlu_full_sv_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_humanities.yaml b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f8b4628f754f5a8ba65b147882160704072bb562 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sv_humanities +task: + - global_mmlu_full_sv_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_other.yaml b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1b29ca130b28e39600d70f62ecf40cdc479ca6ac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sv_other +task: + - global_mmlu_full_sv_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7c4a813e8ceaf1782775bee7eff3e770ce07cad0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sv_social_sciences +task: + - global_mmlu_full_sv_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_stem.yaml b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a6fd88f1efeea969d2dbd28d1aa842a92a7b413e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sv_stem +task: + - global_mmlu_full_sv_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sv/_sv_template_yaml b/lm_eval/tasks/global_mmlu/full/sv/_sv_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..1b9fdea988da74442edba95ebc9f78226588f203 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/_sv_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: sv +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8329302f55bfc6f86e6db0360698baf191cb3235 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ac9fa560ea78ab74f23107bef64503d6b5d26c05 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..096e0e8f04e8bbe558b173fcb955c1944facf124 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ced0b051fb4b37d78daa7756b6b73e708ddb572a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a88871b4efae57ab7ca198ee0d23510a4cae5c07 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c2462c179f1bd149b94a45205266924741e65197 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3ae3fecd44f725570ec275f5cff43bfbd8494bc7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a3f00b24915c4a2bcc84513e01f4b6dc8d997e76 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..71f613d4a7d5fe72ba2e740a8f7e4111971b076e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..46f4c6eaa2efdcc411b57317e6e4aa72c8003966 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..06906bfd32e6e785c173305d13c272fa07c4fbce --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1013ef3008caf86e62f11ef8bddc73b8bb36386c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a6a752f0ae863adf73b2f0dc2b4135af73e17d3a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..547365f63bec67b0383ea7877bb89e8897118d42 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..74086a15a5be1c07a06732ac8a835980b9b3e0d5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8d1f4847c2f7551b9bea3422bc33dd68e67af02a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b78b5846b90af87110e424fb6b58c37b7cf810f6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dd2056294b1ecab4b0c9f192402a98f66adb855f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fc6ebf2fd8e0e84a1558b492d51a5bf17cc1162d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..03773a83b86832425da35e3e0caad4d6ce290091 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e3db653a9e175335012b189e8c88c425b1558409 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4a087557a8375f0a4019faa80137b1d4b77884d3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..638553848b693ef8b8748686587f8cee537b9a43 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7e62f26f0e8018c11d9e14568999441eeb7add15 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b686a26e66388951b274be574fa0e19970f82e6a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..177165383cf0042cec3134a21eb4fd634988164f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e9817c173e56964e24e44a9219934a0f124bc147 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..61359149c5df8d3a56cd6efb600de7cf2a7b1b51 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ce3aa9e208d35250594547e3715e6efbd8bbe088 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6f705f8e19439cd36ff0000463465b300d24442c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..765cdf6033fb2690f0116c3c62a60a27408c3350 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de7b30b4ea8cce8a4db811d3b09f865df448b5e0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..20969051b83299571780a0ad14da9f25e0fd5c15 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a8bd5fab3cb03985b49a370120ca30145a6a33b0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_international_law.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7e5ddb57ea3a13fb8458a0c022fdd13aa7fd3194 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_international_law diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ff161d5f8966e56878c109b43e0071d973ca9d8f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f1602c9080495901a48463178f4dcdc07fa17cbe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6f011063fa668db2436819569546b544d6a42765 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_management.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7ff7b8736be311252d1518f6afe1b16e19b4e379 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_management diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_marketing.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c0e669f715bdadce6a32e4e1fc9caadbda44296f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_marketing diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..83e524457a12ea576ea2b27d346da963f4792c2b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f179879204c6ae29679e8c621a3a53645d634442 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f03ac0905f2d468d5dda67b1e40f3abb77652ab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe7f58d23f7b10ff6e37ce5252bd27e887d38b98 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..79207a8778257e76da9807c17342ee22ae8f89a4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ae533079dbfb8cff7990659c578c3d8727cd6ff5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1c602c4fe434411f7c9d54d3320de84866812406 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ebdef8a89134d3c6f7b59d052ae5ada15e822a45 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3645c38a3c17733b3973402b895e7dcec371af84 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d40f577df995fbf77861b82593640a72505926e3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..edf831060cff4867616f85c84db86138879d4224 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f897662c221c58626723a6a210fdd8d473d15efa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9ad4fb5ccca32ba3006460ea7bda2cebe8281051 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_sociology.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4b869606048121292efb01118ccde1cb13387a06 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_sociology diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..522778def3e070d8b2e1a05ecea4cedf77cca154 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_virology.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8b3cbc8ddd76608a902cd6602fe94504fc1d5c18 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_virology diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d7df52b09b2fb43f64c54fa1c45949f8dd0ac5f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/sv/utils.py b/lm_eval/tasks/global_mmlu/full/sv/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw.yaml b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw.yaml new file mode 100644 index 0000000000000000000000000000000000000000..274543cf056c6a4829a2d01bf76c25585566967d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_sw +task: + - global_mmlu_full_sw_stem + - global_mmlu_full_sw_other + - global_mmlu_full_sw_social_sciences + - global_mmlu_full_sw_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_humanities.yaml b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..02168dff1bc799a208fef8be525788b0b2f7b10d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sw_humanities +task: + - global_mmlu_full_sw_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_other.yaml b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9fa28a162527a99f56785aba4cea362fedc5bf48 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sw_other +task: + - global_mmlu_full_sw_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ad31844234018225e5debcb135d42a76b5ac15f9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sw_social_sciences +task: + - global_mmlu_full_sw_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_stem.yaml b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6f23cae88e4d93382b83171bcbebe6590fb79657 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sw_stem +task: + - global_mmlu_full_sw_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sw/_sw_template_yaml b/lm_eval/tasks/global_mmlu/full/sw/_sw_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..58cf53220fea417fb87ebf1d4b43a8b4c1242922 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/_sw_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: sw +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..187229fb56e6ae19611cea4c7079c790627566bf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3d0d4c5c59ed00554e2ffa4590552692986b7be7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0639b390525d4ced14337cf79660949b7662e235 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a729c9da06b3121f2b8e90d56e8499b19cb0636d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c6b83623fbbde49a899ff3bedc1862b5e95f302b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1856b934331b60a4e5b3a0be88aff52db31dfba3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5ad547ff8d1deaf7a83e328656caa6d4c3e739f4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ff8d874199ba390086adf1a621a39095837e8ef6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..02f53a4a1d1d8fc8dcf42f3c0da9e4a04e6fc253 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b9f4cc6cb3ed871171d24660627f5211623818c4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bcca5b3f91f2bfd38966bf697698c6d774eb9242 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..434d2faa5df8bc81027255a292dd54afcb698a93 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2c1c9d41e9b2495490e671f3bec8de893cac8353 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2a907de64a9bdddbd2314e6a6982ed76c4971c1d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1ae86a7c22d3a492558316da54ff85c3c7e30e6b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..05871f2506d86094895549efe9ddc4c2c0d01521 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8d0de4076560cb09521d67e3a7046e8b172b6bc3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..29bec055e9ee26367934bfd1b9c6810823cf2f2f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2e49866a9fd8095d432a0ecf8e76076b1a3d100e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a7adbd97813163ad8cc9c16683c5d74eb33b388a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2e65ab5ab146c34bcb320d6856b08b30f7068549 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7352ad72e045f64481b102b5a66f3ce1b103992c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..797932baed3c4a8fe590166205d8cdbd839146a3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..602d71ffa9f91315b41dfa7692e012a36d32fd8a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a91dd829b9d2b95c06b3e2f71a5b567293904adb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c19b28da67c861bca14cb1c17212859edf3449dc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7a9c63bd08dee2dcc355e61521571994b439daa0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..239eac65f512d431484ce0f4855b1d591f434f86 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b4f19d84b37e3ac4ee4c2acd5a12ef7bc96f32d0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5725af6378bbc42534a427e152f8181b44e3d5a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d0803407b33d3e4eb301dcd6f05bb1ed2a5f753 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cfe5a9e7deacb57b19b2f24b8c1eaa4d7267632a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ba20e932f12656fce0202f37ade43ffe64e7a6b8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4609bea09b55a4bc77a342165b92796454654470 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_international_law.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bbf616b1de69f0ff77b51f233b6b85c5ce7d7ff0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_international_law diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6781f2d521f0dc319f7787e98af1298f574b8bf1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f862917fb66741ab80323344d887a8ba48b69d0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9eb51cfbcf588a249a71d3ad23287b30730c2391 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_management.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5b0e9e67337c2fca6691c66a6731d34410eadbf4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_management diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_marketing.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fb65e87ebc6d552982d19b70e2635771e2ec55bf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_marketing diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..10d4db0d87c9bab0d6d0ce6760ae4e2b24010753 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b337d0ab8f133bea427f4085e0d413e01e6eabef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f44bfa0d9db24fb91020b0989eaa9ffd679ee4e0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eabd5a916f1858cd7c2c6c76fe5acd7c0cc34504 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..41c6445800ea3d53696ffbe3b3434b423f836618 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..96edac9941a503dd29255f3d57047bc630b4a142 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..db94a2ff4287a908026e9bccf17273e5b2b8ecd5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7cd19d35dc189953db225d5ace390fa65f9cba9f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9434ae4c22f8213987379f93f014a1f36a41d173 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cf35b9c644fa3a50292cfd5c6a998a68b277e5af --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7570e28899b60f7dd0c8a29542f28cdc802e986f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..54c094db40c4f586ec3f11cba73f466954a47e46 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c8d5a42c00d3bc9084dac75ab79a932c7159e521 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_sociology.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..79d51a58d088f82ccb5c7b58dce7e73654ddf3a9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_sociology diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..523b1572bffef3758a6910a19c713687321255a7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_virology.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..43179ff8432de7e09c640a2f9e2c238f7fd74968 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_virology diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bef7b7f84ff14e8618af1580bf5c36f06198fcbb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/sw/utils.py b/lm_eval/tasks/global_mmlu/full/sw/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te.yaml b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5ef0f7ab57f7dcbd31ec0a471091c0788bcd9f65 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_te +task: + - global_mmlu_full_te_stem + - global_mmlu_full_te_other + - global_mmlu_full_te_social_sciences + - global_mmlu_full_te_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_humanities.yaml b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7a3c479e8cd65d5f82c1c83348b3734c75fdc5c2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_te_humanities +task: + - global_mmlu_full_te_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_other.yaml b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2932844acb80e0985e189581313d0b0e38a90688 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_te_other +task: + - global_mmlu_full_te_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..25e721db7e6c9a47d1863266bf44755e7d4edac3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_te_social_sciences +task: + - global_mmlu_full_te_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_stem.yaml b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe2426cac5db970f59b9ad681009a105b70de435 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_te_stem +task: + - global_mmlu_full_te_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/te/_te_template_yaml b/lm_eval/tasks/global_mmlu/full/te/_te_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..d7b1190d48984e97737ebdbc8e1746107f6f2da1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/_te_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: te +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e922fd08d1826ceac13c9a4eb536a0f5f26443e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..00582018a182c2aba17feb4bdf62b598c6b631db --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5bc5e76ef4abf6f8296da1c3612505a9e036f83e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7b440102c86fc00b4f8d8e3a5286a3f8392e172f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..90e5618441a6f0284c7d1b86186c076528c717f5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0f036e60d50f8e4c2d98b032ac3082cd5e7ec9ef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ccdb849a875c73c2030beb2cbd0d70f0586c76ec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f11e5657ba687bca3548e0f1b0e4dbae7e79f2eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c5022ce2981b9612ce5583b4d23fbd1a57a0a6e7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bd5219f073e7105a9177f562378d5bbca12070c3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..88dad05a2d9eacbef66427bc6e206a385da99204 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0e8f37fc1dee6cdf5a64d3f52408342d8b15d4fc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f05276252961898828e55ceaef16b767b1052a9f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cf008a6748dfe708ba6bcf35e700a81579751db5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..97169e931a07143e4d1bf31abc353f5d24db506c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f3edc89662958185b6737382b6c76fc0f814c7b2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d4c182d1d071d10898010a47da30b10f75ad40eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..53b52f4d3ff79686c1018b6d8706685710316d02 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f02170ff8d048cb3502e01966e6e77c3ae41f25 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c77d30aa2dda080c2314cda0ffe5e081276e7faf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7f388a0606812de534f7496e7803954cecdd5036 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..75d54d72bac17a0522dabbf2a7aff9830e275e16 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..383596ff024a193f5a94346128eb9cb0e6144a53 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8db56a85168086d0de6160f13880b6f88eb96e2f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bd471b8d2e4cf4b3d992a03680c51f20bd817f0d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..58f577ed1206932c53832a5b8a3f4bc8f139a126 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..400a3805f815dcb48835dfb5b2c8fce407c26350 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..694ddc304f0632d3b5b0403ae370694503ef440b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b900af19ccfe1aa095845f5a5c52db1678c9f9fe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3492e7249129132ce1f659e3631c43eec49fc7e2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..48a2d75a6714e41b9f0466edb89caa4c409e3d94 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7e95f7ea19dd033356bac2d6385e3903c3fc71db --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dc44c1b0a1f1c616248f3b41da15ff2a645fddfb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d7631419b702a0492a4c93b912b213ff3e879cc3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_international_law.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0c2c7862fbb8689e3ae5cdf3675b25b70ca395d6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_international_law diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..718cd9fad0c2a1b5d966e4a0b4650ada3c3ddf71 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7bb9170ce69de22eece49f97c7cf65d2a6394d51 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..123555383b3e423607fb92a85dddb5fe66a28547 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_management.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f092416ffbd3bf7b46a8e31ded9971e2c88de79e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_management diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_marketing.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..15b84b4678fc5868c5e41f18ca00af67fda8dfed --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_marketing diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f0730be97277f9e1d94caa47a5915dc8236a8c4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..53487f558acb7e033c2ef7a2e7a71d362256a10b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fca8df9b7e1b62b0a6133511912754e4f72927cc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d87f6b029e595bb6a81d110b69e6432b985176f5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9348a76e2949054375aee37c3804225072ecf503 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c8efe8d97b27ea77899c165997d641229742ef5a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b702542ea997902dc6fbb8b83257e3fd034d3da7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..045b6e1ca5ac2fe20c04eaa51d335cee15f27a1c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e5fa30806275cc5c615d58537fe7470e16adf45 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d4ede33f00c22cf9cf0363f15d2b6f742e5ef5eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cb1906d415899f46a26ecb76e36faae1d9e82615 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1ac09ce0d6fcfe9ec0d38bcf616aa97532410da3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bbb7bc7c81a4be3d388675c964b9f21319ed1427 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_sociology.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e080e0823b88d4d6ce06e007dddc376f2d50a621 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_sociology diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..338f080950aa736aac2d7b1ff63d990dd4a467fa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_virology.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f5e38a9eb80b76a0e48c6528aae8844dac25a04 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_virology diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4da26e3e17e0370a9f15543ebcbc31bca0384e7d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/te/utils.py b/lm_eval/tasks/global_mmlu/full/te/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr.yaml b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8cd3d3f3a8ef18969408e79f26eb9cf62a0574af --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_tr +task: + - global_mmlu_full_tr_stem + - global_mmlu_full_tr_other + - global_mmlu_full_tr_social_sciences + - global_mmlu_full_tr_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_humanities.yaml b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f4dade158eb2ba0b672ed66d09b205db8cb97a67 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_tr_humanities +task: + - global_mmlu_full_tr_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_other.yaml b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e80a5b9d8e30e0482db4d433181e2d3153c3b89f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_tr_other +task: + - global_mmlu_full_tr_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..56fc20e14c019a09a3da4196c37fc0d4c6ab69d8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_tr_social_sciences +task: + - global_mmlu_full_tr_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_stem.yaml b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..51f9bb3d90e89ec77e86d89d22e9468687e0b08f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_tr_stem +task: + - global_mmlu_full_tr_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/tr/_tr_template_yaml b/lm_eval/tasks/global_mmlu/full/tr/_tr_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..e322bee67158028cfafaf5f066b2b06c5bf189d7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/_tr_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: tr +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1e8215730fe3086fd17da2c6d3705f3df94c18cd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..444402254c31c5d20abca3a14610143a493a543f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e85390bf1b17894295e767c8f780190cb3cdc286 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4b1afc9cf248c4896286603c7eaf40e7b5edf23c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bdfa69e6dc4ac181408c1803a777c619f551de00 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..df43a67ce3c40d244893a9f723665da467bfc762 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..af2b8b3e808a26211d054ffc36bb66bd61d54011 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..622854f4d1723fe8e9acea54cb61a2ae505f055a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..902bd9c1b4f8629f687057c0a3001912c3d375d9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6b44d0d131520d179f9897eb08eeadce7483d6bc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..27540d97f0f5e8629f49353977a2b37fea566096 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dbcabeed0e45164a58dea395c6c662da5f6b2b5a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..628a4fcf089c8b34623b684ea201d0e28bd9571f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6feb236f98053fe44b87c1d45976d58fda64c777 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9a2a8665cf779c170519fb6a1c7aabb38f6444ad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ffc6dee79e3f7acde296c2a68ee3420b915f9a7b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..77c189a0b016cdf5c7bb63469f71dd108fb6e622 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a756d102e745702cc33e49169713a1bb242ed452 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..51e7dd9e92f19f1c849b839f30fa78522254ed14 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..077476ae73e836b89cb02508f488ee6b5479fb48 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cb60e0425f02bbd1ef8dfb26efdddff3c2b489e2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2b989e05875ad7f8d6f00c3310a1b6a7185fc85d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8a0c4d90049dbdd211e3bc6e261e573b67ca54a3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2a585f02d02ba4fca070b527b5b657ddf2c8a8f7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f88e98310ef3623c581a6586d4d2c4d6ab83a765 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e880b0b53d293fe4c081a59d58c5aa9a05fcee14 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5527bed2cbbb08445df6ad5981b1fb1651d60f0e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..da93a96e1dc65b23b7c784c95a84c54e3ac68575 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a28e110c3292adc977f8e13941ec391b718f81e8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..93871dcf8d6be01702b51095d327480d1138368b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..507a4d5ce35588f9e2ca5288c178ae5c723f0cef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..60cc713ef73b9301dde1c5916a85e5c1906d1236 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8e48bf12280d7a77c5426616a80a46e4f20f448e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..84a958509c079c10c0c7e269d3cf971ba8902987 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_international_law.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d0dc429f01794d584b3d7ebdba96a6e9551cb724 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_international_law diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ea3b7a5158ead9ded2196f661459537db7db7a79 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cd61d7d741942887ae8ddce5cdae383d8043c3c9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b0e785c33123646da2c94dd62444305d7b625850 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_management.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5ce0d753b00251e19fdff128273dab558bec7f7d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_management diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_marketing.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8ffd4986f72dfef6928f01151d6bccbdf359580a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_marketing diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..43814b40c614703757b39bf6992f9e53b27a12d0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e21cfcf6170cc7a7d2e1f8d1ab4d77ac6fa9634e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..88fbfbe2ccb5d2d5526c15f46ded4366f7a5d8fc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9f92f855dc7ef6e29bbe11749c622b65db1d35b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..31b39c38608bd004ef7bd2964035ec854cc0a257 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..283a2b8968965630dc401c9d912806f0086f6b58 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e4c170145f91260b2271a0ebbe9cc0843f571612 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c69f14f76e8e39a690734f265f05a54d18d33e8b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f5e97c69d33697fc1f6eb2c9036a43173abbeb2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..00a5f32aa33a74526256172b0999037c86ece8cd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c8571bdb0583b29aa696debafafe4d2ba473da37 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..539f8da66b7f6ad9bdb4e75ba746477b492de91e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4203e36597dbf5712b8d7e8804697173256f8a20 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_sociology.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9cf6352c428dd6fb5d316e46c3d97cfe3db5a16d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_sociology diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b86a699b6832e790f89f1cbe6bb563d5a3d09b8c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_virology.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..001cbb288858cd2e34c2ecad4d99811735fe4890 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_virology diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f1d4e4f156e3a43f128d649df6ce99201a3a022 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/tr/utils.py b/lm_eval/tasks/global_mmlu/full/tr/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk.yaml b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e880be3296d78472d339dc0e7677fa6402c6354c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_uk +task: + - global_mmlu_full_uk_stem + - global_mmlu_full_uk_other + - global_mmlu_full_uk_social_sciences + - global_mmlu_full_uk_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_humanities.yaml b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b3ec01db662d19e88fd639bcfac012aa559c4f3a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_uk_humanities +task: + - global_mmlu_full_uk_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_other.yaml b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..176b18617e3b55fda06f9edeb57c4237f3b2d465 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_uk_other +task: + - global_mmlu_full_uk_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..66b36a60860741d505ec4166590d92b484addba1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_uk_social_sciences +task: + - global_mmlu_full_uk_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_stem.yaml b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4deba6571d2e63b1b7a52dfbc8c17a6cf05e39c1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_uk_stem +task: + - global_mmlu_full_uk_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/uk/_uk_template_yaml b/lm_eval/tasks/global_mmlu/full/uk/_uk_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..5765ce13a8ea0abca99c5f3d10a466956986b3be --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/_uk_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: uk +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ce37c7153b358121247314c8a5523b98ab96eeb4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..db1433d5cdfbfe1636a8249d7c86d18a28a75c92 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6b123ecedf82a4e8b116b4e1d8426db8e2311e14 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..775d2f2c485a36f44dfcceade1349a9d54bcd70e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f71076dfd6b51d841c44a4a450c20c1635c0301 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..92342ac7c46e7a5aee5c74222570bdc250ce453c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..71384a8b59e22094067a5b9d8b218266347bc851 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6013afe174e9dab8138b6cd653aa7aac98c37337 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..27b604915237b4df47792595be16cdf87dc6c32b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..87131c250cda56368b1d4595313a9785bdac3b3b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..93109632ccd54e93fad1d9dacfd7e9e6a185a905 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0f11fcce9a8323adf61d6ca0643b3f1bfe701cd4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7ff9715a0f218bc8cf71a7859db862c8e197e6d6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ba92e4b8c750a2024e87fccfcc28de4a54bb344e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3a1c86ffd86c74ab468774ebd01a9a58f617f4a0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7d80cce7c444e66b70af8c3f96bfa6abbea8c80d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9f8a40912591a252a67cf28ddaa681e862d4b593 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ebd6c2da9942405d628ff522630a0198659306fe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a8b0cf3a819ae6df7a2907b2c2524d92e844de03 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..010dbec34f80dca3dd32a6f346f8f2e11cd19773 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9a2701440c005fa4ddfc722b3940aba25b7ae121 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..52e80017c6a35cdf66a3d612bd6b36c094d8bf17 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4f41dd3d8b4d19ea2010abfaf86d472bc1a614d8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..72c589ef3c96aef46afa349542cf864dcb704331 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e70675d9af32b086dd01a66028253e74ef6dd39f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e29c558ea152102cb364877f2c66f968cde29589 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6b7354950affa0e6adb65d4033d7d6d171dcbcad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..69a03c065d67601ceb8096d844eeee429550f5c2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9b02711c212e701d91dfdb63974cb0240f1c6269 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..60cc0cdda683fbde92229b53b97ebb2e911fd8e2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b62244ebbbfa71e0ae8464e547b5f57e712c9313 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..57667edc254ffa8c3b3406f30d2fb891e18377f9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..028048904505c81ad20590187f6733538cd7e594 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..37382bab0038d6a1b29f6484cce4900470589257 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_international_law.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d1b046d79cd94474e30b842e71860d04b70e45ab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_international_law diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..12b9da5239e828dcd43a539c508ceda038002bd4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..abb2de2a05e759655b60e1b9577e3626197c2437 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7a1a6f34978827b021e901c7b72e6ea27122d1fe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_management.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ec4cb17d3064d4ff3af32334094945d11b0670cd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_management diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_marketing.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..afbdaee2e028e4011a26da315de8c7d3eccb774f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_marketing diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bc1fe1bd199203861224ee853cef1e575b79adb6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f3b18f833d085ffd8b48939433e970b388b18b9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..34b54e34d0f47c22539808a50c2a7aa044403c23 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..38706977649f0c27b555b6e4887d9b24ca7ef19d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9f9dd1fe8b62a4dfddade286b74f88ca192023db --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4e9810083eccde3b43f2a00df4fbd3e2dc4b2992 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..08e3c2af263ecceb300a9f33cde94ea653b0b65a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dc02a7b2600f9d736f49e80bb87f9ab8a9d0f027 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7090a6e1d86783c40c01b0128f73bcb29cf63d1c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0b43dcfbc70e0cd14292003d7d1b5f61cdd97e1e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b279a94c4a561588539de397fb880407cf22e050 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3b45dc621ba49ba94880829374c9881d3fbd3bcf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4ea308da21fc68715e6fa7f6b38e2adba20c547e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_sociology.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a7aa08ec041e1faac157d3d686267e68ee4fe1c7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_sociology diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d089e77838514827f7e062de041e112e080099f3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_virology.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..41b627f5a1fb910a51519de3bafb2409e061ae7e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_virology diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f5d6d4151e5d1f7aa7b0ade57c888865df4ca8e2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/uk/utils.py b/lm_eval/tasks/global_mmlu/full/uk/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi.yaml b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d6413b35900bc41c7575950bf864729384eb58f2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_vi +task: + - global_mmlu_full_vi_stem + - global_mmlu_full_vi_other + - global_mmlu_full_vi_social_sciences + - global_mmlu_full_vi_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_humanities.yaml b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7a05accaf43fc09f9ab484049e5dc77f64a9e9f9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_vi_humanities +task: + - global_mmlu_full_vi_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_other.yaml b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..880bab9a3ba2ab786ef6489b3fc60d41d4f4eea3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_vi_other +task: + - global_mmlu_full_vi_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6da224f2a0a4db0ed65af52ac4282f7fa41120a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_vi_social_sciences +task: + - global_mmlu_full_vi_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_stem.yaml b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..12526ce7a15b5c02b79925b0c89f2c0bbc9ecb5c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_vi_stem +task: + - global_mmlu_full_vi_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/vi/_vi_template_yaml b/lm_eval/tasks/global_mmlu/full/vi/_vi_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..5a0ca81751e7eaaa78e349f2a4764870a7984768 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/_vi_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: vi +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..47dc80cee15f1aa97ab49861e68f8d5a498f87b6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d29cb5836b46e19ed2bbebfbd05f401e55981ffa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3e3ba1dc2c6f69bbbbf30e8fdb02a0fdc9c88300 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3afecdc13996982b2a46c8dfd76583101368feb6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..34a90a8edec01226b73e9dba20f4f47edc04ff4b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..63a4c772641b6f99ede003b18fe17654aa1edc18 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f7226e020505a05e1a2a0c595835762198ee89ed --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..90a9e0b2c36f6ed0d3689d23f9322a6a02531966 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a09173d6ad778de596aa464ed87d587820244b8c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..22dc78bdf926614f460832ed79ac4b4f38351af4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a6f8dbca3b04ea56b0841e1094423bdc6f6691aa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4d4b3d605fe5b3d4fed95add8207e75ed108de26 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6c501d0a3addc3293c9c896ab3bcdce6b5260e11 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d0936b3be7e0c07df5fa454e790e91b2443a5757 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3b23387f8f872149fc95b68cf4a20e7c539bc4fe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9c09826634d38488a073aefa59deac43bc9e37ed --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..21a28bb4bda7edda1318f3b07765b6f6c7ac1a96 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a912dba1ba51e7054c59a220581b3658e41c43b8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e334fb1c9df66b65b35d6f707ba5588a1a26c341 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ba98297eb3846b0ebc5d9308a601ba4d89ea3eb6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..22e0b00a3eb94bf1ca3f854282c90816457e502e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..06507b7ccccbc9ea567d832cd612702646bf18ff --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d6eeec7ad17f7131ff7d017b88a27caca51e268b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2faf2b09e256fb3a21ee080dccd307fac737d69e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..16ed50b8c79fb2fec977084c3b862fac8cc1ae8c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1cad75ecb6cdfd47e64864657451ae0c9c83d516 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4499711fda825f3aae6c70abd0ae5a048cb112fc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bb92f446d6345e7df44291b4d1fccd589f07b163 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0a12e4debebefda50729ed3302c62ee41c8b5d2d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3ae34e4de18fde6cc7dd1c63542409c403ab4659 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9ad96b12da739b85cd5a59a1e400d403072224d5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5df3661c7176e254381ecff370b195370ef7bc35 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..57820fab48fa914ba683990f6b8bc66ce896ed9a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5b53962b1354a3467b7df3cab38b1193fad7d62d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_international_law.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f81b09e5bde001c714f25d3ffc059901cb4099a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_international_law diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..52ec47d4fc75c827612fb61d7df981a6a340301e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ed89994d5ac60fd1f1ab98a812ae1924fe5e67bb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..258bd8c462d1eb4a947ff3b8d514d46bb2c6412b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_management.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1bd2f606e088fbfdc421b6a6909658d1ad6b7a7c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_management diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_marketing.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..951a36422420f7d229f97b7105908722a0ee0f23 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_marketing diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9d6060070171868fa9c0531db68bb39ade3c7893 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a0cae1b03d6d0b34ffb85fadf77d2689e0ce2c08 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..07987487cf71c50ce6081cb2da22c7eab8d0915f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6a852bc65b82316f7f26e09658c40e1b2cf73156 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..42b198f3f17690131b9d876f2454070d5e08d82b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a7ffc31613159614ccdf0921cf808bdd196135d2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..963496749c60124491b3465eeb0278eedc6ab7b4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..da949e34f219cc1acd6b5b7a6a30c9c6159e71c0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..81c74535e20f83e5eeab83b9412dbce07e6b0473 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7315b35353d98f77fa9507997f587d7510e5e212 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f2eb16528320e8da3a4e680f01cc3182eb57f331 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..12933f08e0004e269019e5a423914b6b48e6ecad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7e90ba55470a49b20b3d90db0116d395fba9d59a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_sociology.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..056c757b8288226ec5d0d683b403f389e8b2944f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_sociology diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5bcd95d6162d92e6d6d8d7800c5f89623355e8a9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_virology.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..775b0cca8ddd581b43d04057c4fc39ea59e96cf3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_virology diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..db6ba6e02682aa894db8a1f77528eba609813816 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/vi/utils.py b/lm_eval/tasks/global_mmlu/full/vi/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo.yaml b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ba9f2460b1c1f6c49df0d91c951057c6c19ee081 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_yo +task: + - global_mmlu_full_yo_stem + - global_mmlu_full_yo_other + - global_mmlu_full_yo_social_sciences + - global_mmlu_full_yo_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_humanities.yaml b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4e3b3c119091fe9232626186469b3cffaeb308a7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_yo_humanities +task: + - global_mmlu_full_yo_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_other.yaml b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ed81bdfec55a90fa017c11850ebeb51e7058c970 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_yo_other +task: + - global_mmlu_full_yo_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bab52fa22ac84fe7157b4027c86732f5e4d15c23 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_yo_social_sciences +task: + - global_mmlu_full_yo_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_stem.yaml b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3687d569e9243d3ce45d662e45c97ea83c3aef0f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_yo_stem +task: + - global_mmlu_full_yo_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/yo/_yo_template_yaml b/lm_eval/tasks/global_mmlu/full/yo/_yo_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..ceefadf5b13c04af262f3ec7cbabcf5e5dcaef58 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/_yo_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: yo +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ef817a386a77acac262b31575a15d737d85da090 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a3bae5d55846ecf3f69599084b8e610233105d92 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b39aa143c866aaafd07f9d0d5ca06dc7ef36266a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..588329820c3a9767063e57c63a0b47526e144ff0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..21dcf8421b0938cc1e87f8c961ac4b67a9f57b76 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f3abaf24c686ee6e13a635044ef62b9c919eeda6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0468634b099bac3e0aa9e86a3294fe0839915e20 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..df6e5844e7e96556d2683a5fe1b6a1cee056c960 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0542a4fe21ad3d91126287fe187e9788846c2860 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cce0b497d7c42f8e6c5090dc970f417e1b0f2a83 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..84ca1413e609719b95155921f913b623e081bdc9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..001689e934d2ac260f1cc99af241ec86c5f4d21f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dcff962c39f1ea58d2f9801bc7d530c9c32a6f23 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6d055d6d5d84e0138ef0c6d5e26f24ba3ef46198 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c21f7f0220ecfa1f17559e620db85bad7e601899 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9b6173f0d1ed576e3e83df8acd96aa5ff5c25a71 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2ffc9740c51e41201bf3e08095c6dbfc4fe42e8d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..394a143ac372eb79debb4563cd8aabcf266ed6ea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f0de18874d4dd74e783688f1b463acca588f8b98 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..02b16faec059089214d6ed9afff85bec522fe2e2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..94733faa00ce0a4d64fa224127d043cd65ced627 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6ec4070e8e086f68714c743426a69f0b6d4769e1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4ab051d9b2178da41229d933d28e339ac41af040 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bedf7f204873c9cebdae6f6acb00f4bbfc80e03c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cb4867094d9ad787892c1b0dfb45c05074238fff --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cea21a893c6fdb26def6f68c3528c72a63177d30 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a8eae6cdde52619df8084eca39e89209fd66bc2b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cdaca54fd1f2bd6989adef5f30f6a2cb08e96c48 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ef3d75271e5f2438315d1678cabcd5b8cdc58052 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0ec62db01f62cc47d12566beb06500ca56dfa9fd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..30c8573c4f8e397f934e3b13d56e0c2036df69d1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..52f91d43bfaf7af5c95393f34dbf8468e8948f90 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4ab0ec2b9ba72b1ed9175ed96f663a60cb977fd3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f510c2d1058c90734f1c5d79a6818ba7922a1f90 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_international_law.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9b657110abc7b2754ab10ce098ba0975c9b91e56 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_international_law diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e3ac0a521fa21804d313a28dfb91ffb30bdc3bf3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a7a9e71860cb9b6eab9af1d0ca653a7d04883269 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4a61d3ae9fa1e2f5d2542fb1c6dfb92dfb1a3b1b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_management.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..92b0b526de32184043682d990d6d612abbc1ab43 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_management diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_marketing.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..74c17559dbf18b6afe5e0e22da74e7f6c55eef9e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_marketing diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cfc2c8cbcc33ca978e2f20cd8deacd40c23cf6e1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ad12bde64e609adcf43a18057dd13dd95d79ffd6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2e85331c907b1ee1fa21beaebdae22ada5a5df58 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9a6a6fc6db60a49f2a3228fb4ade06b4f25ab0f1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..62d9ae7bbc09fc99ba608e4b6d8ae0b520ffe5ee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de42ec7a8f8655372f5ce2efece4105f854cd0e8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e2ad3236230f03f7395a195d5cb9a95f6dbff054 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..198f227b893a3758d463ac50d867e6ee3355b76c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e5942f74b5f036208a319abd9b37b937b2c02980 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..efd4ab7df34d4475735fafa7876901ecfd429cff --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e1956c87cdf7676dad84c956e5d32ace13c39841 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5c6c2b8c1c5444066af0541751816b9198caa307 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a12c4abd3e44cc1e7b256efc95bdcf59ec2a6308 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_sociology.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e5747900bca8d2a1c00ae902353f5d1938e03617 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_sociology diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..493dda39641e29de5d42257e4837dd196aeed92d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_virology.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..420b1b01c8bbe069eb3b397b29e8a3ffef620a21 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_virology diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c0964b30717ccf68ee13a8be218f52cdbf56d078 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/yo/utils.py b/lm_eval/tasks/global_mmlu/full/yo/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh.yaml b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh.yaml new file mode 100644 index 0000000000000000000000000000000000000000..098ec0979efba32b317192ec59e747a148c91234 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_zh +task: + - global_mmlu_full_zh_stem + - global_mmlu_full_zh_other + - global_mmlu_full_zh_social_sciences + - global_mmlu_full_zh_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_humanities.yaml b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fb347da8f853d37744910ef81a62421128e50cea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_zh_humanities +task: + - global_mmlu_full_zh_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_other.yaml b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..98d4ed5e613fee710e2f3fe210c4da2abddd7e9e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_zh_other +task: + - global_mmlu_full_zh_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..235012e6c715579dade6820e7173e6fbb69a7f47 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_zh_social_sciences +task: + - global_mmlu_full_zh_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_stem.yaml b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..660486a4f41437936635eb61999a13c457bc2ab7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_zh_stem +task: + - global_mmlu_full_zh_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/zh/_zh_template_yaml b/lm_eval/tasks/global_mmlu/full/zh/_zh_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..2c83d4957bae4fa07bf63d4a177331fd2954c37c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/_zh_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: zh +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..42ea6276b627a8738f2133e30a509dec2f25c28f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..45001d14cd72fefba40209a38ee044003b421b91 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..37183dc793c238e307ebf03cec66000336f50a45 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bbb5ea382e41273deec7c6df1840c3336c652a30 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d90ee0ea4fe299091b204e411d9a0ef1593fc0d2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ba2031feff45c3c7db26a8e30207cdff0bc81a1f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..860761b4f0079f95ca4646ad8c111d1542863a9e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..53d01965f9d5cbbbeea2ccc9278809854b100d24 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dbd2e4be875c004d729554a69a02b39671df8e50 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..523d6b3075fda6eb41abb160dad2c9a746fa9599 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0a08214f7fe96bedab8f3b1bb55fbb161b711119 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..99332b356f6e0fc4dc70ac228b6d8c7af51c3117 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b042cc8bf0231106ee6ecc283863ff8f577f2aab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bf920112179d6d81df9c20512beb31a3422eaba4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b30acad7621b2c8339eaa16c3bdb55fb6b9929a9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3b108c421370e7f86d5aa9bb44a087f029e1aa6f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..647755991d84de30207ae9010f1a89f2ebc7d4a2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..07d390aab2f0c30f68df1b6df744404902cb5d14 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..28b2bdaa54cc1ab9dcad563983a4ab84abb25460 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4d084034da2abd8306485999ff67596a618c742f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6232ef607ecfcd8922518d269909d0a1a1b9dff4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..70e3e52bc9c834410672b8d2b534e35c4f4470b0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe6cb91391b8721579e1a9a1d0cfd4340d6fcd1c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cfa7213a4b5e4f614c3cee431a3626875eb102a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ca0b7ad86f6facaca944eb360f7f5f6ed3d5c0f9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..38868e969bf5d308b624dd6f4015605405aa9951 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b79237d2f04a713f51bf1ec59bf5b6d7f0b5179d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6355da2f8e7196062ed7eda14ae58449daab18e7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f223886737a2157807ca44f74c717d6955d3f66a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9aac20977c68333b736f60f548ef4ca506f04960 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..47d8355f5bfe5d7905efd9e0b8f3c80babc88700 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c1f6671f2d183423812f1053d1e1052fc4a9aea0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d6941ff7b2f110c7ea63fc623622f967802c3c8f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ee228b22d38c2c25a361f745ebdbafcbdfe58743 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_international_law.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..07b1ebd16f4b5cfca32969742b2691c6ee3839f0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_international_law diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ab10ffac5daa7950533669e07708f827099ee429 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..451260b55f483511bd543a401881c29c3a9e6815 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..508d14f64d8f5fe198f36550d697f91100297c14 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_management.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9db0b32bb1f162c4d84b2a967465d1de9dd3fb17 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_management diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_marketing.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a7142ce49f5cb9f0deeae7e4b1d10a291c364220 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_marketing diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..220530907d9c9f952163216967fe7545dde8a072 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5b479c9b0cfd4160c92a4a71aae143789fc9c283 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..58d13a9944ad81bef3c761b55cede58c60571368 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..95d91dfdebfd1f84481228d8fccee406399a43f1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..57452a39b09498bc0f1c753dba424feb56bbef21 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..20e237b24cdd4992e4c5e0c6ee1b191c5bc5a2b1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..56358fe789834a394bea785e64dc49183cf080cb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..630681abf67acca1ce5b1bcc869f16229f5699ff --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e48f35cb38bad9514dad8bf685dc74dfc49951a2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f75432cd7d9c9d8c46050b669ac80f040e821df3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fbbf45ad76940af904ca88b0ed6705ae5cf015b8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f760d2a29df46b1cfd1737f3304925477ed09a49 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1dafaf5d754525a6ef06d5d3a6a61196a7c4982b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_sociology.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..549f4ef156f784a95acd6a6d6bec93398944fb5d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_sociology diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..597dcfa1cca601c17fce1facc62dcb3830220869 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_virology.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1984c6b00249308ba504b84a754025b38c422cac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_virology diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fa15c0cb282b9ff2bab168748b13a50b0de85d78 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/zh/utils.py b/lm_eval/tasks/global_mmlu/full/zh/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7df72cb061f0fecba46e15ff9b57552817979afb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/hrm8k/README.md b/lm_eval/tasks/hrm8k/README.md new file mode 100644 index 0000000000000000000000000000000000000000..cd5a1739665aecf9e5e8e6949759c6343aa56060 --- /dev/null +++ b/lm_eval/tasks/hrm8k/README.md @@ -0,0 +1,46 @@ +# HRM8K + +### Paper + +Title: [Understand, Solve and Translate: Bridging the Multilingual Mathematical Reasoning Gap](https://www.arxiv.org/abs/2501.02448) + +Large language models (LLMs) demonstrate exceptional performance on complex reasoning tasks. However, despite their strong reasoning capabilities in high-resource languages (e.g., English and Chinese), a significant performance gap persists in other languages. To investigate this gap in Korean, we introduce HRM8K, a benchmark comprising 8,011 English-Korean parallel bilingual math problems. Through systematic analysis of model behaviors, we identify a key finding: these performance disparities stem primarily from difficulties in comprehending non-English inputs, rather than limitations in reasoning capabilities. Based on these findings, we propose UST (Understand, Solve, and Translate), a method that strategically uses English as an anchor for reasoning and solution generation. By fine-tuning the model on 130k synthetically generated data points, UST achieves a 10.91% improvement on the HRM8K benchmark and reduces the multilingual performance gap from 11.6% to 0.7%. Additionally, we show that improvements from UST generalize effectively to different Korean domains, demonstrating that capabilities acquired from machine-verifiable content can be generalized to other areas. We publicly release the benchmark, training dataset, and models. + +Homepage: https://huggingface.co/datasets/HAERAE-HUB/HRM8K + + +### Citation + +``` +@article{ko2025understand, + title={Understand, Solve and Translate: Bridging the Multilingual Mathematical Reasoning Gap}, + author={Ko, Hyunwoo and Son, Guijin and Choi, Dasol}, + journal={arXiv preprint arXiv:2501.02448}, + year={2025} +} +``` + +### Groups and and Tasks + +#### Groups + +* `hrm8k`: HRM8K comprises 8,011 instances for evaluation, sourced through a combination of translations from established English benchmarks (e.g., GSM8K, MATH, OmniMath, MMMLU) and original problems curated from existing Korean math exams. This benchmark consists of Korean instruction and question. +* `hrm8k_en`: English version of `hrm8k`. This benchmark consists of English instruction and question. + +#### Tasks + +* `hrm8k_{gsm8k|ksm|math|mmmlu|omni_math}` +* `hrm8k_en_{gsm8k|ksm|math|mmmlu|omni_math}` + +### Checklist + +For adding novel benchmarks/datasets to the library: +* [x] Is the task an existing benchmark in the literature? + * [x] Have you referenced the original paper that introduced the task? + * [ ] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test? + + +If other tasks on this dataset are already supported: +* [ ] Is the "Main" variant of this task clearly denoted? +* [ ] Have you provided a short sentence in a README on what each new variant adds / evaluates? +* [ ] Have you noted which, if any, published evaluation setups are matched by this variant? diff --git a/lm_eval/tasks/hrm8k/default/_hrm8k_yaml b/lm_eval/tasks/hrm8k/default/_hrm8k_yaml new file mode 100644 index 0000000000000000000000000000000000000000..18c53d22997061ed43854d449e5b9e64a80e7335 --- /dev/null +++ b/lm_eval/tasks/hrm8k/default/_hrm8k_yaml @@ -0,0 +1,22 @@ +dataset_path: HAERAE-HUB/HRM8K +output_type: generate_until +test_split: test +doc_to_text: !function utils.doc_to_text +doc_to_target: !function utils.doc_to_target +process_results: !function utils.process_results +num_fewshot: 0 +generation_kwargs: + until: + - "" + - "<|end_of_text|>" + - "<|endoftext|>" + - "<|im_end|>" + max_gen_toks: 512 + do_sample: false + temperature: 0 +metric_list: + - metric: exact_match + aggregation: mean + higher_is_better: true +metadata: + version: 1.0 diff --git a/lm_eval/tasks/hrm8k/default/hrm8k.yaml b/lm_eval/tasks/hrm8k/default/hrm8k.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cc9753f63beb50d2f041245093a186a9b64cd7ae --- /dev/null +++ b/lm_eval/tasks/hrm8k/default/hrm8k.yaml @@ -0,0 +1,13 @@ +group: hrm8k +task: + - hrm8k_gsm8k + - hrm8k_ksm + - hrm8k_math + - hrm8k_mmmlu + - hrm8k_omni_math +metric_list: + - metric: exact_match + aggregation: mean + higher_is_better: true +metadata: + version: 1.0 diff --git a/lm_eval/tasks/hrm8k/default/hrm8k_gsm8k.yaml b/lm_eval/tasks/hrm8k/default/hrm8k_gsm8k.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a46ff5a04378c03d144c225ad164bd6f9b9cb1c4 --- /dev/null +++ b/lm_eval/tasks/hrm8k/default/hrm8k_gsm8k.yaml @@ -0,0 +1,3 @@ +include: _hrm8k_yaml +dataset_name: GSM8K +task: hrm8k_gsm8k diff --git a/lm_eval/tasks/hrm8k/default/hrm8k_ksm.yaml b/lm_eval/tasks/hrm8k/default/hrm8k_ksm.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c1f7ac230e402c8a59bf85073b27ec2bf5722c3 --- /dev/null +++ b/lm_eval/tasks/hrm8k/default/hrm8k_ksm.yaml @@ -0,0 +1,3 @@ +include: _hrm8k_yaml +dataset_name: KSM +task: hrm8k_ksm diff --git a/lm_eval/tasks/hrm8k/default/hrm8k_math.yaml b/lm_eval/tasks/hrm8k/default/hrm8k_math.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ecdf67cfcc41ad4755a4ca79b40215eb90e3927e --- /dev/null +++ b/lm_eval/tasks/hrm8k/default/hrm8k_math.yaml @@ -0,0 +1,3 @@ +include: _hrm8k_yaml +dataset_name: MATH +task: hrm8k_math diff --git a/lm_eval/tasks/hrm8k/default/hrm8k_mmmlu.yaml b/lm_eval/tasks/hrm8k/default/hrm8k_mmmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..20faaaf10f1374df40f6cd0fe57ab8c330909c7d --- /dev/null +++ b/lm_eval/tasks/hrm8k/default/hrm8k_mmmlu.yaml @@ -0,0 +1,4 @@ +include: _hrm8k_yaml +dataset_name: MMMLU +task: hrm8k_mmmlu +doc_to_text: !function utils.doc_to_text_mmmlu diff --git a/lm_eval/tasks/hrm8k/default/hrm8k_omni_math.yaml b/lm_eval/tasks/hrm8k/default/hrm8k_omni_math.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c2dadac2dbdd5aab1e2c5530b0337677f2e92d7e --- /dev/null +++ b/lm_eval/tasks/hrm8k/default/hrm8k_omni_math.yaml @@ -0,0 +1,3 @@ +include: _hrm8k_yaml +dataset_name: OMNI_MATH +task: hrm8k_omni_math diff --git a/lm_eval/tasks/hrm8k/default/utils.py b/lm_eval/tasks/hrm8k/default/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..aaeecd1495fd83dc93af64f92f2b5dd4f3277224 --- /dev/null +++ b/lm_eval/tasks/hrm8k/default/utils.py @@ -0,0 +1,285 @@ +import re +from typing import Dict, List + + +def doc_to_text(doc): + text = ( + "주어진 문제를 풀어보세요.\n" + "문제를 푼 후, 최종 답변을 다음과 같은 형식으로 작성하세요: $\\boxed{N}$.\n\n" + f"문제: {doc['question'].strip()}\n답변:" + ) + return text + + +def doc_to_text_mmmlu(doc): + text = ( + "주어진 문제를 풀어보세요.\n" + "문제를 푼 후, 주어진 선택지 (1, 2, 3, 4) 중 최종 선택지를 다음 형식으로 작성하세요: $\\boxed{N}$.\n\n" + f"문제: {doc['question'].strip()}\n답변:" + ) + return text + + +def doc_to_target(doc): + return postprocess(doc["answer"]) + + +def postprocess(s): + s = str(s).strip() + try: + float_value = float(s) + return str(int(float_value)) if float_value.is_integer() else str(float_value) + except Exception: + return s + + +def process_results(doc: dict, results: List[str]) -> Dict[str, int]: + candidate = results[0] + + gold = postprocess(doc["answer"]) + + if not gold: + print(doc, candidate, gold) + if is_equiv(candidate, gold): + retval = 1 + else: + retval = 0 + + results = { + "exact_match": retval, + } + return results + + +def is_equiv(str1, str2, verbose=False): + if str1 is None and str2 is None: + print("WARNING: Both None") + return True + if str1 is None or str2 is None: + return False + + str1, str2 = parse_math_answer(str1), parse_math_answer(str2) + + try: + ss1 = _strip_string(str1) + ss1 = postprocess(ss1) + ss2 = _strip_string(str2) + if verbose: + print(ss1, ss2) + return ss1 == ss2 + except Exception: + return str1 == str2 + + +def parse_math_answer(raw_string): + def remove_boxed(s): + left = "\\boxed{" + try: + assert s[: len(left)] == left + assert s[-1] == "}" + answer = s[len(left) : -1] + if "=" in answer: + answer = answer.split("=")[-1].lstrip(" ") + return answer + except Exception: + return None + + def last_boxed_only_string(string): + idx = string.rfind("\\boxed") + if idx < 0: + idx = string.rfind("\\fbox") + if idx < 0: + return None + i = idx + right_brace_idx = None + num_left_braces_open = 0 + while i < len(string): + if string[i] == "{": + num_left_braces_open += 1 + if string[i] == "}": + num_left_braces_open -= 1 + if num_left_braces_open == 0: + right_brace_idx = i + break + i += 1 + + if right_brace_idx is None: + retval = None + else: + retval = string[idx : right_brace_idx + 1] + + return retval + + def get_answer_with_dollar_sign(s): + first_pattern = "\$(.*)\$" + last_match = None + matches = re.findall(first_pattern, s) + if matches: + last_match = matches[-1] + if "=" in last_match: + last_match = last_match.split("=")[-1].lstrip(" ") + return last_match + + def get_answer_without_dollar_sign(s): + last_match = None + if "=" in s: + last_match = s.split("=")[-1].lstrip(" ").rstrip(".") + if "\\n" in last_match: + last_match = last_match.split("\\n")[0] + else: + pattern = "(?:\\$)?\d+(?:\.\d+)?(?![\w\d])" + matches = re.findall(pattern, s) + if matches: + last_match = matches[-1] + return last_match + + if "\\boxed" in raw_string: + answer = remove_boxed(last_boxed_only_string(raw_string)) + else: + answer = get_answer_with_dollar_sign(raw_string) + if not answer: + answer = get_answer_without_dollar_sign(raw_string) + return answer + + +# code from https://github.com/hendrycks/math/blob/main/modeling/math_equivalence.py +def _fix_fracs(string): + substrs = string.split("\\frac") + new_str = substrs[0] + if len(substrs) > 1: + substrs = substrs[1:] + for substr in substrs: + new_str += "\\frac" + if substr[0] == "{": + new_str += substr + else: + try: + assert len(substr) >= 2 + except Exception: + return string + a = substr[0] + b = substr[1] + if b != "{": + if len(substr) > 2: + post_substr = substr[2:] + new_str += "{" + a + "}{" + b + "}" + post_substr + else: + new_str += "{" + a + "}{" + b + "}" + else: + if len(substr) > 2: + post_substr = substr[2:] + new_str += "{" + a + "}" + b + post_substr + else: + new_str += "{" + a + "}" + b + string = new_str + return string + + +def _fix_a_slash_b(string): + if len(string.split("/")) != 2: + return string + a = string.split("/")[0] + b = string.split("/")[1] + try: + a = int(a) + b = int(b) + assert string == "{}/{}".format(a, b) + new_string = "\\frac{" + str(a) + "}{" + str(b) + "}" + return new_string + except Exception: + return string + + +def _remove_right_units(string): + # "\\text{ " only ever occurs (at least in the val set) when describing units + if "\\text{ " in string: + splits = string.split("\\text{ ") + assert len(splits) == 2 + return splits[0] + else: + return string + + +def _fix_sqrt(string): + if "\\sqrt" not in string: + return string + splits = string.split("\\sqrt") + new_string = splits[0] + for split in splits[1:]: + if split[0] != "{": + a = split[0] + new_substr = "\\sqrt{" + a + "}" + split[1:] + else: + new_substr = "\\sqrt" + split + new_string += new_substr + return new_string + + +def _strip_string(string): + # linebreaks + string = string.replace("\n", "") + # print(string) + + # remove inverse spaces + string = string.replace("\\!", "") + # print(string) + + # replace \\ with \ + string = string.replace("\\\\", "\\") + # print(string) + + # replace tfrac and dfrac with frac + string = string.replace("tfrac", "frac") + string = string.replace("dfrac", "frac") + # print(string) + + # remove \left and \right + string = string.replace("\\left", "") + string = string.replace("\\right", "") + # print(string) + + # Remove circ (degrees) + string = string.replace("^{\\circ}", "") + string = string.replace("^\\circ", "") + + # remove dollar signs + string = string.replace("\\$", "") + + # remove units (on the right) + string = _remove_right_units(string) + + # remove percentage + string = string.replace("\\%", "") + string = string.replace("\%", "") + + # " 0." equivalent to " ." and "{0." equivalent to "{." Alternatively, add "0" if "." is the start of the string + string = string.replace(" .", " 0.") + string = string.replace("{.", "{0.") + # if empty, return empty string + if len(string) == 0: + return string + if string[0] == ".": + string = "0" + string + + # to consider: get rid of e.g. "k = " or "q = " at beginning + if len(string.split("=")) == 2: + if len(string.split("=")[0]) <= 2: + string = string.split("=")[1] + + # fix sqrt3 --> sqrt{3} + string = _fix_sqrt(string) + + # remove spaces + string = string.replace(" ", "") + + # \frac1b or \frac12 --> \frac{1}{b} and \frac{1}{2}, etc. Even works with \frac1{72} (but not \frac{72}1). Also does a/b --> \\frac{a}{b} + string = _fix_fracs(string) + + # manually change 0.5 --> \frac{1}{2} + if string == "0.5": + string = "\\frac{1}{2}" + + # NOTE: X/Y changed to \frac{X}{Y} in dataset, but in simple cases fix in case the model output is X/Y + string = _fix_a_slash_b(string) + + return string diff --git a/lm_eval/tasks/hrm8k/en/_hrm8k_en_yaml b/lm_eval/tasks/hrm8k/en/_hrm8k_en_yaml new file mode 100644 index 0000000000000000000000000000000000000000..18c53d22997061ed43854d449e5b9e64a80e7335 --- /dev/null +++ b/lm_eval/tasks/hrm8k/en/_hrm8k_en_yaml @@ -0,0 +1,22 @@ +dataset_path: HAERAE-HUB/HRM8K +output_type: generate_until +test_split: test +doc_to_text: !function utils.doc_to_text +doc_to_target: !function utils.doc_to_target +process_results: !function utils.process_results +num_fewshot: 0 +generation_kwargs: + until: + - "" + - "<|end_of_text|>" + - "<|endoftext|>" + - "<|im_end|>" + max_gen_toks: 512 + do_sample: false + temperature: 0 +metric_list: + - metric: exact_match + aggregation: mean + higher_is_better: true +metadata: + version: 1.0 diff --git a/lm_eval/tasks/hrm8k/en/hrm8k_en.yaml b/lm_eval/tasks/hrm8k/en/hrm8k_en.yaml new file mode 100644 index 0000000000000000000000000000000000000000..17eac64a82614f69207eeb045394d4e1dfeb9454 --- /dev/null +++ b/lm_eval/tasks/hrm8k/en/hrm8k_en.yaml @@ -0,0 +1,13 @@ +group: hrm8k_en +task: + - hrm8k_gsm8k_en + - hrm8k_ksm_en + - hrm8k_math_en + - hrm8k_mmmlu_en + - hrm8k_omni_math_en +metric_list: + - metric: exact_match + aggregation: mean + higher_is_better: true +metadata: + version: 1.0 diff --git a/lm_eval/tasks/hrm8k/en/hrm8k_gsm8k_en.yaml b/lm_eval/tasks/hrm8k/en/hrm8k_gsm8k_en.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c2697a0b8284e3ec72267c46c5094531ccb389f3 --- /dev/null +++ b/lm_eval/tasks/hrm8k/en/hrm8k_gsm8k_en.yaml @@ -0,0 +1,3 @@ +include: _hrm8k_en_yaml +dataset_name: GSM8K +task: hrm8k_gsm8k_en diff --git a/lm_eval/tasks/hrm8k/en/hrm8k_ksm_en.yaml b/lm_eval/tasks/hrm8k/en/hrm8k_ksm_en.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a5e34d452ad000bccf6eeb2bb2af6ecd20729686 --- /dev/null +++ b/lm_eval/tasks/hrm8k/en/hrm8k_ksm_en.yaml @@ -0,0 +1,3 @@ +include: _hrm8k_en_yaml +dataset_name: KSM +task: hrm8k_ksm_en diff --git a/lm_eval/tasks/hrm8k/en/hrm8k_math_en.yaml b/lm_eval/tasks/hrm8k/en/hrm8k_math_en.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ffbdce817608ec3b18e722b94f837295f3f7238c --- /dev/null +++ b/lm_eval/tasks/hrm8k/en/hrm8k_math_en.yaml @@ -0,0 +1,3 @@ +include: _hrm8k_en_yaml +dataset_name: MATH +task: hrm8k_math_en diff --git a/lm_eval/tasks/hrm8k/en/hrm8k_mmmlu_en.yaml b/lm_eval/tasks/hrm8k/en/hrm8k_mmmlu_en.yaml new file mode 100644 index 0000000000000000000000000000000000000000..812f62e268278cb52178aecd364b4329b7aec4c7 --- /dev/null +++ b/lm_eval/tasks/hrm8k/en/hrm8k_mmmlu_en.yaml @@ -0,0 +1,4 @@ +include: _hrm8k_en_yaml +dataset_name: MMMLU +task: hrm8k_mmmlu_en +doc_to_text: !function utils.doc_to_text_mmmlu diff --git a/lm_eval/tasks/hrm8k/en/hrm8k_omni_math_en.yaml b/lm_eval/tasks/hrm8k/en/hrm8k_omni_math_en.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f859de3d03bb5e389223d3d82a9f09bd07f0a1ff --- /dev/null +++ b/lm_eval/tasks/hrm8k/en/hrm8k_omni_math_en.yaml @@ -0,0 +1,3 @@ +include: _hrm8k_en_yaml +dataset_name: OMNI_MATH +task: hrm8k_omni_math_en diff --git a/lm_eval/tasks/hrm8k/en/utils.py b/lm_eval/tasks/hrm8k/en/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..b67d8e9105781226b6e5508ee1952daad97ddbf1 --- /dev/null +++ b/lm_eval/tasks/hrm8k/en/utils.py @@ -0,0 +1,285 @@ +import re +from typing import Dict, List + + +def doc_to_text(doc): + text = ( + "Solve the given question.\n" + "After solving the problem, state your final answer in the following format: $\\boxed{N}$.\n\n" + f"Question: {doc['original'].strip()}\nAnswer:" + ) + return text + + +def doc_to_text_mmmlu(doc): + text = ( + "Solve the given question.\n" + "After solving the problem, state your final choice among the choices (1, 2, 3, 4) in the following format: $\\boxed{N}$.\n\n" + f"Question: {doc['original'].strip()}\nAnswer:" + ) + return text + + +def doc_to_target(doc): + return postprocess(doc["answer"]) + + +def postprocess(s): + s = str(s).strip() + try: + float_value = float(s) + return str(int(float_value)) if float_value.is_integer() else str(float_value) + except Exception: + return s + + +def process_results(doc: dict, results: List[str]) -> Dict[str, int]: + candidate = results[0] + + gold = postprocess(doc["answer"]) + + if not gold: + print(doc, candidate, gold) + if is_equiv(candidate, gold): + retval = 1 + else: + retval = 0 + + results = { + "exact_match": retval, + } + return results + + +def is_equiv(str1, str2, verbose=False): + if str1 is None and str2 is None: + print("WARNING: Both None") + return True + if str1 is None or str2 is None: + return False + + str1, str2 = parse_math_answer(str1), parse_math_answer(str2) + + try: + ss1 = _strip_string(str1) + ss1 = postprocess(ss1) + ss2 = _strip_string(str2) + if verbose: + print(ss1, ss2) + return ss1 == ss2 + except Exception: + return str1 == str2 + + +def parse_math_answer(raw_string): + def remove_boxed(s): + left = "\\boxed{" + try: + assert s[: len(left)] == left + assert s[-1] == "}" + answer = s[len(left) : -1] + if "=" in answer: + answer = answer.split("=")[-1].lstrip(" ") + return answer + except Exception: + return None + + def last_boxed_only_string(string): + idx = string.rfind("\\boxed") + if idx < 0: + idx = string.rfind("\\fbox") + if idx < 0: + return None + i = idx + right_brace_idx = None + num_left_braces_open = 0 + while i < len(string): + if string[i] == "{": + num_left_braces_open += 1 + if string[i] == "}": + num_left_braces_open -= 1 + if num_left_braces_open == 0: + right_brace_idx = i + break + i += 1 + + if right_brace_idx is None: + retval = None + else: + retval = string[idx : right_brace_idx + 1] + + return retval + + def get_answer_with_dollar_sign(s): + first_pattern = "\$(.*)\$" + last_match = None + matches = re.findall(first_pattern, s) + if matches: + last_match = matches[-1] + if "=" in last_match: + last_match = last_match.split("=")[-1].lstrip(" ") + return last_match + + def get_answer_without_dollar_sign(s): + last_match = None + if "=" in s: + last_match = s.split("=")[-1].lstrip(" ").rstrip(".") + if "\\n" in last_match: + last_match = last_match.split("\\n")[0] + else: + pattern = "(?:\\$)?\d+(?:\.\d+)?(?![\w\d])" + matches = re.findall(pattern, s) + if matches: + last_match = matches[-1] + return last_match + + if "\\boxed" in raw_string: + answer = remove_boxed(last_boxed_only_string(raw_string)) + else: + answer = get_answer_with_dollar_sign(raw_string) + if not answer: + answer = get_answer_without_dollar_sign(raw_string) + return answer + + +# code from https://github.com/hendrycks/math/blob/main/modeling/math_equivalence.py +def _fix_fracs(string): + substrs = string.split("\\frac") + new_str = substrs[0] + if len(substrs) > 1: + substrs = substrs[1:] + for substr in substrs: + new_str += "\\frac" + if substr[0] == "{": + new_str += substr + else: + try: + assert len(substr) >= 2 + except Exception: + return string + a = substr[0] + b = substr[1] + if b != "{": + if len(substr) > 2: + post_substr = substr[2:] + new_str += "{" + a + "}{" + b + "}" + post_substr + else: + new_str += "{" + a + "}{" + b + "}" + else: + if len(substr) > 2: + post_substr = substr[2:] + new_str += "{" + a + "}" + b + post_substr + else: + new_str += "{" + a + "}" + b + string = new_str + return string + + +def _fix_a_slash_b(string): + if len(string.split("/")) != 2: + return string + a = string.split("/")[0] + b = string.split("/")[1] + try: + a = int(a) + b = int(b) + assert string == "{}/{}".format(a, b) + new_string = "\\frac{" + str(a) + "}{" + str(b) + "}" + return new_string + except Exception: + return string + + +def _remove_right_units(string): + # "\\text{ " only ever occurs (at least in the val set) when describing units + if "\\text{ " in string: + splits = string.split("\\text{ ") + assert len(splits) == 2 + return splits[0] + else: + return string + + +def _fix_sqrt(string): + if "\\sqrt" not in string: + return string + splits = string.split("\\sqrt") + new_string = splits[0] + for split in splits[1:]: + if split[0] != "{": + a = split[0] + new_substr = "\\sqrt{" + a + "}" + split[1:] + else: + new_substr = "\\sqrt" + split + new_string += new_substr + return new_string + + +def _strip_string(string): + # linebreaks + string = string.replace("\n", "") + # print(string) + + # remove inverse spaces + string = string.replace("\\!", "") + # print(string) + + # replace \\ with \ + string = string.replace("\\\\", "\\") + # print(string) + + # replace tfrac and dfrac with frac + string = string.replace("tfrac", "frac") + string = string.replace("dfrac", "frac") + # print(string) + + # remove \left and \right + string = string.replace("\\left", "") + string = string.replace("\\right", "") + # print(string) + + # Remove circ (degrees) + string = string.replace("^{\\circ}", "") + string = string.replace("^\\circ", "") + + # remove dollar signs + string = string.replace("\\$", "") + + # remove units (on the right) + string = _remove_right_units(string) + + # remove percentage + string = string.replace("\\%", "") + string = string.replace("\%", "") + + # " 0." equivalent to " ." and "{0." equivalent to "{." Alternatively, add "0" if "." is the start of the string + string = string.replace(" .", " 0.") + string = string.replace("{.", "{0.") + # if empty, return empty string + if len(string) == 0: + return string + if string[0] == ".": + string = "0" + string + + # to consider: get rid of e.g. "k = " or "q = " at beginning + if len(string.split("=")) == 2: + if len(string.split("=")[0]) <= 2: + string = string.split("=")[1] + + # fix sqrt3 --> sqrt{3} + string = _fix_sqrt(string) + + # remove spaces + string = string.replace(" ", "") + + # \frac1b or \frac12 --> \frac{1}{b} and \frac{1}{2}, etc. Even works with \frac1{72} (but not \frac{72}1). Also does a/b --> \\frac{a}{b} + string = _fix_fracs(string) + + # manually change 0.5 --> \frac{1}{2} + if string == "0.5": + string = "\\frac{1}{2}" + + # NOTE: X/Y changed to \frac{X}{Y} in dataset, but in simple cases fix in case the model output is X/Y + string = _fix_a_slash_b(string) + + return string diff --git a/lm_eval/tasks/humaneval/README.md b/lm_eval/tasks/humaneval/README.md new file mode 100644 index 0000000000000000000000000000000000000000..60bff53b58de97ffe74410d350919e9bdb6a2217 --- /dev/null +++ b/lm_eval/tasks/humaneval/README.md @@ -0,0 +1,46 @@ +# HumanEval + +## Paper +Evaluating Large Language Models Trained on Code +https://arxiv.org/abs/2107.03374 + +We introduce Codex, a GPT language model fine-tuned on publicly available code from GitHub, and study its Python code-writing capabilities. A distinct production version of Codex powers GitHub Copilot. On HumanEval, a new evaluation set we release to measure functional correctness for synthesizing programs from docstrings, our model solves 28.8% of the problems, while GPT-3 solves 0% and GPT-J solves 11.4%. Furthermore, we find that repeated sampling from the model is a surprisingly effective strategy for producing working solutions to difficult prompts. Using this method, we solve 70.2% of our problems with 100 samples per problem. Careful investigation of our model reveals its limitations, including difficulty with docstrings describing long chains of operations and with binding operations to variables. Finally, we discuss the potential broader impacts of deploying powerful code generation technologies, covering safety, security, and economics. + +Homepage: https://github.com/openai/human-eval + + +## Citation +``` +@article{chen2021codex, + title={Evaluating Large Language Models Trained on Code}, + author={Mark Chen and Jerry Tworek and Heewoo Jun and Qiming Yuan and Henrique Ponde de Oliveira Pinto and Jared Kaplan and Harri Edwards and Yuri Burda and Nicholas Joseph and Greg Brockman and Alex Ray and Raul Puri and Gretchen Krueger and Michael Petrov and Heidy Khlaaf and Girish Sastry and Pamela Mishkin and Brooke Chan and Scott Gray and Nick Ryder and Mikhail Pavlov and Alethea Power and Lukasz Kaiser and Mohammad Bavarian and Clemens Winter and Philippe Tillet and Felipe Petroski Such and Dave Cummings and Matthias Plappert and Fotios Chantzis and Elizabeth Barnes and Ariel Herbert-Voss and William Hebgen Guss and Alex Nichol and Alex Paino and Nikolas Tezak and Jie Tang and Igor Babuschkin and Suchir Balaji and Shantanu Jain and William Saunders and Christopher Hesse and Andrew N. Carr and Jan Leike and Josh Achiam and Vedant Misra and Evan Morikawa and Alec Radford and Matthew Knight and Miles Brundage and Mira Murati and Katie Mayer and Peter Welinder and Bob McGrew and Dario Amodei and Sam McCandlish and Ilya Sutskever and Wojciech Zaremba}, + year={2021}, + eprint={2107.03374}, + archivePrefix={arXiv}, + primaryClass={cs.LG} +} +``` + +### Groups and Tasks + +#### Groups + +* Not part of a group yet. + +#### Tasks + +- `humaneval` pass@1 +- `humaneval_64` pass@64 variant + +### Checklist + +For adding novel benchmarks/datasets to the library: +* [ ] Is the task an existing benchmark in the literature? + * [ ] Have you referenced the original paper that introduced the task? + * [ ] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test? + + +If other tasks on this dataset are already supported: +* [ ] Is the "Main" variant of this task clearly denoted? +* [ ] Have you provided a short sentence in a README on what each new variant adds / evaluates? +* [ ] Have you noted which, if any, published evaluation setups are matched by this variant? diff --git a/lm_eval/tasks/humaneval/humaneval.yaml b/lm_eval/tasks/humaneval/humaneval.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6e3a8d6d30ed9c312c9eac4e20deba4a6cc61510 --- /dev/null +++ b/lm_eval/tasks/humaneval/humaneval.yaml @@ -0,0 +1,30 @@ +task: humaneval +dataset_path: openai/openai_humaneval +unsafe_code: true +output_type: generate_until +test_split: test +doc_to_text: "{{prompt}}" +doc_to_target: "{{test}}\ncheck({{entry_point}})" +metric_list: + - metric: !function utils.pass_at_k + aggregation: mean + higher_is_better: true + k: [1] +generation_kwargs: + until: + - "\nclass" + - "\ndef" + - "\n#" + - "\nif" + - "\nprint" + max_gen_toks: 1024 + do_sample: false +repeats: 1 +num_fewshot: 0 +filter_list: + - name: "create_test" + filter: + - function: "custom" + filter_fn: !function utils.build_predictions +metadata: + version: 1.0 diff --git a/lm_eval/tasks/humaneval/humaneval_64.yaml b/lm_eval/tasks/humaneval/humaneval_64.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1720ae7c77c1d1b0bae96a4b1153ea6b0893c64d --- /dev/null +++ b/lm_eval/tasks/humaneval/humaneval_64.yaml @@ -0,0 +1,19 @@ +include: humaneval.yaml +task: humaneval_64 +repeats: 64 +metric_list: + - metric: !function utils.pass_at_k + aggregation: mean + higher_is_better: true + k: [2,8,16,32,64] +generation_kwargs: + until: + - "\nclass" + - "\ndef" + - "\n#" + - "\nif" + - "\nprint" + max_gen_toks: 1024 + do_sample: true + temperature: 0.2 + top_p: 0.95 diff --git a/lm_eval/tasks/humaneval/utils.py b/lm_eval/tasks/humaneval/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..9eb7c48ff5a9376bc5c2f832c045627180bf00bf --- /dev/null +++ b/lm_eval/tasks/humaneval/utils.py @@ -0,0 +1,27 @@ +import evaluate as hf_evaluate + + +try: + compute_ = hf_evaluate.load("code_eval") + test_cases = ["assert add(2, 3)==5"] + candidates = [["def add(a,b): return a*b"]] + results = compute_.compute(references=test_cases, predictions=candidates, k=[1]) +except Exception as e: + raise e + + +def pass_at_k(references: list[str], predictions: list[list[str]], k: list[int] = None): + global compute_ + assert k is not None + if isinstance(k, int): + k = [k] + res = compute_.compute( + references=references, + predictions=predictions, + k=k, + ) + return res[0] + + +def build_predictions(resps: list[list[str]], docs: list[dict]) -> list[list[str]]: + return [[doc["prompt"] + r for r in resp] for resp, doc in zip(resps, docs)] diff --git a/lm_eval/tasks/ifeval/instructions.py b/lm_eval/tasks/ifeval/instructions.py index a79cbba4f5e2fec10b8c0525a215b5e967c6fcfc..9a7bcce13b0f29b829f21dea14b8f7ce5baeaac1 100644 --- a/lm_eval/tasks/ifeval/instructions.py +++ b/lm_eval/tasks/ifeval/instructions.py @@ -722,7 +722,7 @@ class RephraseChecker(Instruction): if not self.is_change(value): raise ValueError( - f"value {value} does not contain " "changes in the form of *change me*." + f"value {value} does not contain changes in the form of *change me*." ) response_without_changes = self.strip_changes(value) diff --git a/lm_eval/tasks/ifeval/instructions_util.py b/lm_eval/tasks/ifeval/instructions_util.py index df58fb30209a86cd461fc29ca37ba89b4e54d583..33e0a0a00c54f301334dc1bcd211dd588e6c9529 100644 --- a/lm_eval/tasks/ifeval/instructions_util.py +++ b/lm_eval/tasks/ifeval/instructions_util.py @@ -35,10 +35,11 @@ RANK = os.environ.get("LOCAL_RANK", "0") def download_nltk_resources(): """Download 'punkt' if not already installed""" - assert ( - (nltk_version := parse_version(version("nltk"))) - >= parse_version(NLTK_MIN_VERSION) - ), f"`nltk` version {nltk_version} is not >= {NLTK_MIN_VERSION}. Please update `nltk` before proceeding--older versions are vulnerable to a remote code execution vulnerability." + assert (nltk_version := parse_version(version("nltk"))) >= parse_version( + NLTK_MIN_VERSION + ), ( + f"`nltk` version {nltk_version} is not >= {NLTK_MIN_VERSION}. Please update `nltk` before proceeding--older versions are vulnerable to a remote code execution vulnerability." + ) try: nltk.data.find("tokenizers/punkt_tab") diff --git a/lm_eval/tasks/japanese_leaderboard/ja_leaderboard_mgsm.py b/lm_eval/tasks/japanese_leaderboard/ja_leaderboard_mgsm.py index 28f270b5eba616457d2f007a4d802302b7e0c78e..0d122c7af1b4862b8f80e949409500de0c0c832f 100644 --- a/lm_eval/tasks/japanese_leaderboard/ja_leaderboard_mgsm.py +++ b/lm_eval/tasks/japanese_leaderboard/ja_leaderboard_mgsm.py @@ -23,9 +23,9 @@ def _extract_answer(completion): def process_results(doc, results): - assert ( - len(results) == 1 - ), f"results should be a list with 1 str element, but is {results}" + assert len(results) == 1, ( + f"results should be a list with 1 str element, but is {results}" + ) completion = results[0] extracted_answer = _extract_answer(completion) diff --git a/lm_eval/tasks/kmmlu/cot_hard/_cot_kmmlu_yaml b/lm_eval/tasks/kmmlu/cot_hard/_cot_kmmlu_yaml index 163a03dfd21f3ba21ddc74c8470cf9de0ff29466..0c0fadf7353f0f65999eeb61f42183402aa987c0 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/_cot_kmmlu_yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/_cot_kmmlu_yaml @@ -1,6 +1,3 @@ -tag: - - kmmlu - - kmmlu_hard_cot dataset_path: HAERAE-HUB/KMMLU-HARD output_type: generate_until validation_split: dev # not meant to be used, only here to silence warnings diff --git a/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard.yaml b/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1e459a05d6157b0b4c69fd03d5a4a22053dcced9 --- /dev/null +++ b/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard.yaml @@ -0,0 +1,11 @@ +group: kmmlu_cot_hard +task: + - kmmlu_cot_hard_stem + - kmmlu_cot_hard_other + - kmmlu_cot_hard_applied_science + - kmmlu_cot_hard_humss +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_applied_science.yaml b/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_applied_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4944cefb6021687487411ace34cca1e3db7e11f7 --- /dev/null +++ b/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_applied_science.yaml @@ -0,0 +1,8 @@ +group: kmmlu_cot_hard_applied_science +task: + - kmmlu_cot_hard_applied_science_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_humss.yaml b/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_humss.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7b30f3588d2a60eede02dce461986140848d8781 --- /dev/null +++ b/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_humss.yaml @@ -0,0 +1,8 @@ +group: kmmlu_cot_hard_humss +task: + - kmmlu_cot_hard_humss_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_other.yaml b/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..70329cf494af4b4f65ca25deede9cc42fc1566ae --- /dev/null +++ b/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_other.yaml @@ -0,0 +1,8 @@ +group: kmmlu_cot_hard_other +task: + - kmmlu_cot_hard_other_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_stem.yaml b/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..65d92fe270424259e93b463bdb35abb172d60610 --- /dev/null +++ b/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_stem.yaml @@ -0,0 +1,8 @@ +group: kmmlu_cot_hard_stem +task: + - kmmlu_cot_hard_stem_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_accounting.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_accounting.yaml index bb17436e43f30e4e0432213c7e0453f908ad7726..0a89dce5e47b1d08c56dcc643738f373fe4d8850 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_accounting.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_accounting.yaml @@ -78,4 +78,5 @@ fewshot_config: 당기순이익은 과소 계상됩니다. 왜냐하면 매출원가가 더 높아지면 이익은 줄어들기 때문입니다. , 상품재고액을 과대 계상한 경우 매출원가는 과대 계상되고, 당기순이익은 과소 계상됩니다. '따라서, 정답은 (A) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_accounting +task: kmmlu_cot_hard_accounting +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_agricultural_sciences.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_agricultural_sciences.yaml index b100094b57dcb0534c717f1b5b0c7f93d34cde8e..d3ab573490813f1171a8bb6b206517725cf6f366 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_agricultural_sciences.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_agricultural_sciences.yaml @@ -80,4 +80,5 @@ fewshot_config: 각 선택지를 분석한 결과 (C) 선택지인 '감자의 바이러스 병을 막기 위해 평지에서 채종한다.'가 가장 잘못된 방법으로 보입니다. 이는 감자의 바이러스 병 예방과 평지에서의 채종 사이에 직접적인 연관성이 없기 때문입니다. 따라서, 정답은 (C) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_agricultural_sciences +task: kmmlu_cot_hard_agricultural_sciences +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_aviation_engineering_and_maintenance.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_aviation_engineering_and_maintenance.yaml index f9cd217f7bd10b0c00588538ddc0fe0d7812837b..dcc59f889f9e57e7e472d29147e2d526e2c8ce80 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_aviation_engineering_and_maintenance.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_aviation_engineering_and_maintenance.yaml @@ -85,4 +85,5 @@ fewshot_config: (D) 옆놀이의 안정성 향상을 위해서는 트위스트가 중요한 역할을 합니다. 트위스트는 날개 팁 부분의 각도를 조절하여, 항공기가 고속에서도 안정적으로 비행할 수 있도록 돕습니다. 따라서, 정답은 (D) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_aviation_engineering_and_maintenance +task: kmmlu_cot_hard_aviation_engineering_and_maintenance +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_biology.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_biology.yaml index 4d6e52b77cdae2abdc3e4a37876cd48df98865e9..52e0c77d83c2f9f631d095da3e1d4820dee60faf 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_biology.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_biology.yaml @@ -80,4 +80,5 @@ fewshot_config: 없어야 합니다. 이러한 조건을 충족하는 미생물은 절대호산성 미생물입니다. 절대호산성 미생물은 극도로 산성 환경에서만 생존할 수 있으며, 중성 또는 알칼리성 환경에서는 성장할 수 없습니다. 따라서, 정답은 (A) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_biology +task: kmmlu_cot_hard_biology +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_chemical_engineering.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_chemical_engineering.yaml index 9b7435d3f58e1156f604ba20a96f9392ec3efc7e..49ebe86600a199bd5d6ab5f7f50fa4b6f614901a 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_chemical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_chemical_engineering.yaml @@ -87,4 +87,5 @@ fewshot_config: 압력, V는 부피입니다. W = -P1Vln(P2/P1) = -(10×10^5 Pa)(0.05m^3)ln((1×10^5 Pa)/(10×10^5 Pa)) = 0입니다. 따라서, 정답은 (A) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_chemical_engineering +task: kmmlu_cot_hard_chemical_engineering +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_chemistry.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_chemistry.yaml index d761f5e22fe4bdf59e2368643c95d092d8067171..0cfd1dff148cedd50a3125222ea2076d5afe48aa 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_chemistry.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_chemistry.yaml @@ -76,4 +76,5 @@ fewshot_config: 황산의 분자량은 98g/mol입니다. 황산의 몰 수는 49g ÷ 98g/mol = 0.5mol입니다. 이 수용액의 물 농도는 0.5mol/1L = 0.5M입니다. 따라서, 정답은 (A) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_chemistry +task: kmmlu_cot_hard_chemistry +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_civil_engineering.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_civil_engineering.yaml index 87d3d22e5a9713b11d53c10eda9b2de85ea96523..13893796b0947cff81331f30c18bc72663cd9420 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_civil_engineering.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_civil_engineering.yaml @@ -97,4 +97,5 @@ fewshot_config: 것이며, 이 계약은 미국의 근대도시계획 성립기에 지역제의 바탕이 된 제도는 (A) 협약(covenant)이 가장 적절한 선택입니다. 따라서, 정답은 (A) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_civil_engineering +task: kmmlu_cot_hard_civil_engineering +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_computer_science.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_computer_science.yaml index 463b8e75b371b82b313b5594010e4c51bb5eed70..f83994093a3f51d1a7c3fdf963da10fbbd331ee3 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_computer_science.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_computer_science.yaml @@ -96,4 +96,5 @@ fewshot_config: 주어진 설명에서 언급된 감사 추적(Auditing)이나 Shadow Password와 같은 부가적인 기능보다는 사용자 간 침범 차단과 사용자별 파일 권한 설정에 초점을 맞춘 것으로 정의됩니다. 따라서, 정답은 (B) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_computer_science +task: kmmlu_cot_hard_computer_science +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_construction.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_construction.yaml index a277f637fa5d5a99989a00e14d68cf81ac679727..3cfb3e9f899ba5fffa5fe4204dec8d476b17ee14 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_construction.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_construction.yaml @@ -83,4 +83,5 @@ fewshot_config: 압축비가 9입니다. 이를 식에 대입하여 연소실 체적을 계산해 보겠습니다. 행정체적 = 240 압축비 = 9 연소실_체적 = 행정체적 / (압축비 - 1) = 240 / 8 = 30 연소실의 체적은 30cc입니다. 따라서, 정답은 (B) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_construction +task: kmmlu_cot_hard_construction +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_criminal_law.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_criminal_law.yaml index fa46f0f45b53373330abe4c26a7b7018e00efb19..559ff679f91b46e0f2ed1de9a25d15d9aee2a08e 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_criminal_law.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_criminal_law.yaml @@ -106,4 +106,5 @@ fewshot_config: 고려에 따라 변경된 경우에 형법 제1조 제2항이 적용되는 것은 맞지만, 법령의 변경이 있더라도 그것이 반성적 고려에서 비롯된 것이 아니라면 형법 제1조 제2항이 적용되지 않습니다. 따라서, 정답은 (C) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_criminal_law +task: kmmlu_cot_hard_criminal_law +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_ecology.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_ecology.yaml index 0da8a9a7343e474e25abc7b0f12c4ffcc6f0a333..0d59eaa7af9305ab7d93d8c585481fa424184c7a 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_ecology.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_ecology.yaml @@ -88,4 +88,5 @@ fewshot_config: 질소는 질소 가스나 암모니아의 형태로 존재합니다. , 위의 분석에 따르면, (B) 옵션의 설명이 잘못되었습니다. 아연과 코발트는 일반적으로 미량원소로 분류되지만, 이 설명에서는 그들을 다량 원소로 잘못 분류하고 있습니다. 따라서, 정답은 (B) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_ecology +task: kmmlu_cot_hard_ecology +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_economics.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_economics.yaml index baaa3bd98027fa8a49218c05f68e4373f5741145..838e0bd5e7c6dc6a31d1eabeee7e12ea32be31fe 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_economics.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_economics.yaml @@ -87,4 +87,5 @@ fewshot_config: - 7,000,000원)은 어떻게 처리되었을까요? 이 금액은 회사의 이익에서 차감되어야 합니다. 이는 이익잉여금, 즉 회사가 벌어들인 이익에서 퇴직금 등을 지급한 후 남은 금액이 감소한다는 의미입니다. 선택지 D는 옳지 않습니다. 따라서, 정답은 (C) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_economics +task: kmmlu_cot_hard_economics +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_education.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_education.yaml index 0e68a0b06d0b47ca044d997eba2458bc7b341e8b..899c38a803c629c4f579d511d45ffab19efecbec 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_education.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_education.yaml @@ -81,4 +81,5 @@ fewshot_config: 수 있습니다. 하지만, 평생교육법에는 전국평생학습도시협의회의 구성 및 운영에 필요한 사항은 교육부령으로 정한다는 내용은 없습니다. 이를 통해 선택지 (B)가 평생교육법 상평생학습도시에 대한 설명으로 옳지 않다는 것을 알 수 있습니다. 따라서, 정답은 (B) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_education +task: kmmlu_cot_hard_education +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_electrical_engineering.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_electrical_engineering.yaml index 3b5af8b76ddfb8bf409f661e1e32415610437041..f68f2ca73ada32e47c81c8519f80781773891826 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_electrical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_electrical_engineering.yaml @@ -93,4 +93,5 @@ fewshot_config: Pyrometer)입니다. 이는 흑체 또는 비슷한 조건의 물체로부터 방사되는 가시광선의 강도를 통해 온도를 측정하는 방식을 사용하며, 흑체의 방사율을 1로 설정하여 보정하는 원리를 기반으로 합니다. 따라서, 정답은 (A) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_electrical_engineering +task: kmmlu_cot_hard_electrical_engineering +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_electronics_engineering.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_electronics_engineering.yaml index 4d1327034f885fee689e3b7f5abfe61c6309b812..ce9ce0e36a51e466d570358353bae6ae2403ff93 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_electronics_engineering.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_electronics_engineering.yaml @@ -79,4 +79,5 @@ fewshot_config: 합니다. 마지막으로 선택지 (D)는 컴퓨터 시스템의 하드웨어 오류를 발견하고 그에 대한 적절한 조치를 한다는 내용입니다. 이 역시 운영체제의 기능으로, 하드웨어 오류를 감지하고 적절한 처리를 하여 시스템의 안정성을 유지하는 역할을 합니다. 따라서, 정답은 (A) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_electronics_engineering +task: kmmlu_cot_hard_electronics_engineering +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_energy_management.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_energy_management.yaml index 8dca183aa92baf841435a6b0bd0dc839893a8091..0c5e18b1e9c5e47f30a8cf839ebdfd70a3451724 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_energy_management.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_energy_management.yaml @@ -85,4 +85,5 @@ fewshot_config: 요인이 아닙니다. , 태양광발전 모듈의 I-V 특성곡선에서 일사량에 따라 가장 많이 변화하는 것은 전류입니다. 따라서, 정답은 (B) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_energy_management +task: kmmlu_cot_hard_energy_management +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_environmental_science.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_environmental_science.yaml index d9080b078a11033c07a93e1a06a9c9e3fef7d3fe..47de0dca4d1a744aac8fecfac202c4e023b6f166 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_environmental_science.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_environmental_science.yaml @@ -82,4 +82,5 @@ fewshot_config: 전통적인 구성요소는 아닙니다. 과정분석은 보다 일반적인 용어로, 다양한 맥락에서 사용될 수 있습니다. (D) 목록분석 (Inventory Analysis): 이 역시 LCA의 핵심 단계 중 하나입니다. 따라서, 정답은 (C) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_environmental_science +task: kmmlu_cot_hard_environmental_science +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_fashion.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_fashion.yaml index 983a6590a86fd74ef7f8ee4c44cec313a415bfb3..598aad051facef623ce67bb4878b568f92cacd75 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_fashion.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_fashion.yaml @@ -84,4 +84,5 @@ fewshot_config: 수선 등을 포함한 종합적인 서비스를 제공하는 것으로 보입니다. 이는 일반적인 클리닝 서비스와는 차별화된 서비스라고 볼 수 있습니다. 따라서, 정답은 (D) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_fashion +task: kmmlu_cot_hard_fashion +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_food_processing.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_food_processing.yaml index 2d3473f07a12bf9cd856787053d13097aac38043..3cbec3d867c03a4512cc796059745e51076026fc 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_food_processing.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_food_processing.yaml @@ -78,4 +78,5 @@ fewshot_config: 이 품종은 상대적으로 높은 온도에 더 민감하게 반응하며, 일장의 변화에는 덜 민감한 특성을 가지고 있어 한국의 기후 특성에서 효과적으로 성장할 수 있는 조건을 가지고 있습니다. 따라서, 정답은 (D) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_food_processing +task: kmmlu_cot_hard_food_processing +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_gas_technology_and_engineering.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_gas_technology_and_engineering.yaml index a244b95500a8b85379eb56c0352305dbf610b081..49551077b77b0bb5104d60ca137354e52d2bc90e 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_gas_technology_and_engineering.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_gas_technology_and_engineering.yaml @@ -85,4 +85,5 @@ fewshot_config: 이들은 모두 환경에 해롭습니다. 물은 염소 가스의 재해 방지용으로서의 흡수제나 재해제로서 적합하지 않습니다. 따라서, 정답은 (D) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_gas_technology_and_engineering +task: kmmlu_cot_hard_gas_technology_and_engineering +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_geomatics.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_geomatics.yaml index cfc4866a625bb98d16e35e01fa5714afb820b095..961b20cefa2ee21384dbff57b351ead0cef3a7de 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_geomatics.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_geomatics.yaml @@ -75,4 +75,5 @@ fewshot_config: 공식은 실제 거리의 제곱근에 축척분모를 곱한 값이 측정된 면적이 될 것입니다. 이렇게 보면, 공식이 의미하는 바를 잘 나타내는 것 같습니다. 따라서, 정답은 (B) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_geomatics +task: kmmlu_cot_hard_geomatics +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_health.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_health.yaml index d5b477911d5af37cad7fbc6873528916c688ef8b..29a432e4f8a709c3cb7400d3d31cbf9f12e47f0e 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_health.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_health.yaml @@ -85,4 +85,5 @@ fewshot_config: 일반적으로는 사업장에서 자체적으로 실시하는 것이 일반적입니다. , 국민건강증진기금의 사용 범위에는 포함되지 않을 수 있습니다. 따라서, 정답은 (D) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_health +task: kmmlu_cot_hard_health +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_industrial_engineer.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_industrial_engineer.yaml index fc8f3dec1af02675463b2b1f42395dfcfbf5cc83..f087d221a7a51c49ccbeb38c52798f240877599f 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_industrial_engineer.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_industrial_engineer.yaml @@ -82,4 +82,5 @@ fewshot_config: 절삭저항의 대부분을 차지합니다. 이러한 정보를 바탕으로, 주분력이 절삭저항의 대부분을 차지하므로, 탄소강을 가공할 때 가장 큰 절삭저항을 주는 것은 주분력일 것이라고 추론할 수 있습니다. 따라서, 정답은 (D) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_industrial_engineer +task: kmmlu_cot_hard_industrial_engineer +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_information_technology.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_information_technology.yaml index aa557282f049e963bcc4b2ddd061955b47aacf3a..3d6fd3ce59615b8b3aa7fdfa3c21d35f71fbfff9 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_information_technology.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_information_technology.yaml @@ -80,4 +80,5 @@ fewshot_config: 피어스 B-E형 발진 회로에서는 컬렉터-이미터 간의 임피던스가 유도성일 때 가장 안정한 발진을 지속할 수 있습니다. 따라서, 정답은 (B) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_information_technology +task: kmmlu_cot_hard_information_technology +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_interior_architecture_and_design.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_interior_architecture_and_design.yaml index 2b3849d79c4846a5874891322da962fb4d4bbe7c..84a61a70093ad3206edeb302afb0fee363a8591f 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_interior_architecture_and_design.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_interior_architecture_and_design.yaml @@ -92,4 +92,5 @@ fewshot_config: 지칭하지 않으며, 실제 설계 및 계획 과정에서는 보통 최소값, 최대값, 또는 목표값과 같이 더 구체적이고 명확한 기준을 바탕으로 최적치수를 결정합니다. 따라서, 정답은 (C) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_interior_architecture_and_design +task: kmmlu_cot_hard_interior_architecture_and_design +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_korean_history.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_korean_history.yaml index 7498080c53435e432f36448fcb7ea705fcd6533b..18666a70c369a32084e23ac93d56adc07482e006 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_korean_history.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_korean_history.yaml @@ -97,4 +97,5 @@ fewshot_config: 군사 기구로, 흥선대원군은 왕권 강화를 위해 비변사의 기능을 약화시켰습니다. (D) 통상 수교 거부 정책을 추진하였다 - 흥선대원군은 외세의 침략을 막기 위해 통상 수교 거부 정책을 추진하였습니다. 따라서, 정답은 (C) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_korean_history +task: kmmlu_cot_hard_korean_history +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_law.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_law.yaml index 0328a01030149e38f12e4bd94a578e102ce384ad..ddbd97b67ebdac99243568a2d1f644abaf444640 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_law.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_law.yaml @@ -81,4 +81,5 @@ fewshot_config: 중 하나입니다. (D) 네트워크 취약성으로 발생하는 문제는 물리적 통제절차의 개선으로 해결해야 한다는 것은, 네트워크 보안 문제를 해결하기 위해 물리적인 통제 절차를 개선하는 것입니다. 이는 네트워크 보안을 강화하는 데 매우 중요한 역할을 합니다. 따라서, 정답은 (C) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_law +task: kmmlu_cot_hard_law +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_machine_design_and_manufacturing.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_machine_design_and_manufacturing.yaml index 4c6207bb29c5fc58779df1459019eb5e69d8e608..d1e0d88bba132126dd90fbb7f3c7d2d88b526852 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_machine_design_and_manufacturing.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_machine_design_and_manufacturing.yaml @@ -83,4 +83,5 @@ fewshot_config: 선택지는 해칭이 주된 중심선 또는 단면도의 주된 외형선에 대하여 90℃ 기울기로 그린다는 내용인데, 이는 잘못된 내용입니다. 일반적으로 해칭은 45도 기울기로 그려집니다. , 이 선택지는 해칭의 일반적인 원칙을 잘못 설명하고 있습니다. 따라서, 정답은 (C) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_machine_design_and_manufacturing +task: kmmlu_cot_hard_machine_design_and_manufacturing +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_management.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_management.yaml index 116289043de2da5ac00079db7561ef602a33a35d..435d762fac73b4524ccf5e02e4bb339f2584ccfb 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_management.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_management.yaml @@ -76,4 +76,5 @@ fewshot_config: 각 부문별로 목표를 정하고 분산된 시스템을 구축하는 것은 물류 시스템의 효율성을 높일 수 있지만, 이는 통합적인 관리가 어려울 수 있습니다. 따라서, 정답은 (B) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_management +task: kmmlu_cot_hard_management +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_maritime_engineering.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_maritime_engineering.yaml index e168371f2d703facf5b6073e290a51c85bbd999d..bb7103eb66318bed9b4af17155f696150afaa58c 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_maritime_engineering.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_maritime_engineering.yaml @@ -98,4 +98,5 @@ fewshot_config: (D) 아르곤: 아르곤도 불활성 기체로, 지방질에 용해되거나 마취 효과를 나타내지 않습니다. 아르곤은 주로 산업 공정에서 보호 가스로 사용됩니다. 따라서, 정답은 (B) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_maritime_engineering +task: kmmlu_cot_hard_maritime_engineering +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_marketing.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_marketing.yaml index 240d92a2f05061fa6f27a53c6b20fec6341db048..971a106b246a76a38626f8015835d940c9219795 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_marketing.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_marketing.yaml @@ -91,4 +91,5 @@ fewshot_config: 있으며, 상담원이 고객의 반론에 대한 자연스러운 대응력을 갖추면 고객의 불만이나 반대를 효과적으로 처리할 수 있습니다. 따라서, 정답은 (A) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_marketing +task: kmmlu_cot_hard_marketing +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_materials_engineering.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_materials_engineering.yaml index 59774a1543412d8747584a03fe2f5e54f6506cef..4f5867e25802d21181113ab7c27b5e4c276b23d1 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_materials_engineering.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_materials_engineering.yaml @@ -84,4 +84,5 @@ fewshot_config: 구별하는 데 사용될 수 있습니다. 냉간가공은 재결성 온도 이하에서 이루어지며, 열간가공은 재결성 온도 이상에서 이루어집니다. , 냉간가공과 열간가공을 구별하는 기준은 재결성 온도라고 할 수 있습니다. 따라서, 정답은 (C) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_materials_engineering +task: kmmlu_cot_hard_materials_engineering +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_math.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_math.yaml index 103bc573e7da3935baa8de5224af0af124869b35..5aa474d2a44e209b712aaf774caf28280a457968 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_math.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_math.yaml @@ -95,4 +95,5 @@ fewshot_config: + ω2019 입니다. , ω^2017 + ω^2019 = ω + 1 입니다. 주어진 식에 ω + 1을 대입하면 ω + 1 + ω + 1 + 1 + 1이 됩니다. 따라서, 정답은 (C) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_math +task: kmmlu_cot_hard_math +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_mechanical_engineering.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_mechanical_engineering.yaml index a57d0661382d2bbc585551579e03092e947c3628..8d99ba72b10ce12cf79220686028b00cff4735ab 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_mechanical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_mechanical_engineering.yaml @@ -77,4 +77,5 @@ fewshot_config: 어떤 것일까요? V벨트의 단면 크기는 알파벳이 뒤로 갈수록 커집니다 즉, A형은 B형보다 작고, B형은 C형보다 작으며, 이런 식으로 D형, E형으로 진행됩니다. , 주어진 선택지 중에서 가장 단면이 큰 V벨트는 E형일 것입니다. 따라서, 정답은 (C) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_mechanical_engineering +task: kmmlu_cot_hard_mechanical_engineering +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_nondestructive_testing.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_nondestructive_testing.yaml index c7ecea17255e9a2826ed9aa680a6ae2d5a6667a6..656b08accbf7134cae354ab3d1386d36c10d79fa 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_nondestructive_testing.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_nondestructive_testing.yaml @@ -91,4 +91,5 @@ fewshot_config: 시험체의 두께 t를 계산하면 다음과 같습니다. t = v / (2f) = 4800 / (2 * 2 * 10^6) = 0.0012m = 1.2mm 따라서, 정답은 (A) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_nondestructive_testing +task: kmmlu_cot_hard_nondestructive_testing +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_patent.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_patent.yaml index 1e5607a5c096cf44001e148652df467a77e97b1e..30b608259005f2660c8dceb5c2853da626e52954 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_patent.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_patent.yaml @@ -110,4 +110,5 @@ fewshot_config: 발명에 대해서는 먼저 출원한 자만이 특허를 받을 수 있다고 규정하고 있으므로, 乙은 특허를 받을 수 없습니다. , (D)는 옳은 설명입니다. 따라서, 정답은 (A) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_patent +task: kmmlu_cot_hard_patent +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_political_science_and_sociology.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_political_science_and_sociology.yaml index 50c159f94718ac15f27408cefff2e40d4d3bb48f..7d8c4e56e6d90373816a9f7d6792a52c6651227b 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_political_science_and_sociology.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_political_science_and_sociology.yaml @@ -88,4 +88,5 @@ fewshot_config: 범위에서도 활용되는 전략입니다. 도시의 이미지를 국제적으로 홍보하고, 외국인 투자자나 관광객을 유치하는 것이 도시마케팅의 일부이기 때문입니다. 도시마케팅의 공간적 범위가 국내로만 한정되어 있다는 것은 잘못된 설명입니다. 따라서, 정답은 (D) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_political_science_and_sociology +task: kmmlu_cot_hard_political_science_and_sociology +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_psychology.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_psychology.yaml index f86d14e68f91e8c6f35f2440d0eb7f5e8a42dd64..125befe11facba89e8b95030db963400ecb4ef19 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_psychology.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_psychology.yaml @@ -95,4 +95,5 @@ fewshot_config: 이러한 분석을 통해 고급 상담자의 특징은 (C) 내담자에게 의도적으로 주의를 기울이고 중요한 정보를 수집하고 인식할 수 있다는 것으로 보입니다. 이는 상담자의 기본적인 역량을 넘어서서 고급 상담자가 갖추어야 할 능력으로 보입니다. 따라서, 정답은 (C) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_psychology +task: kmmlu_cot_hard_psychology +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_public_safety.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_public_safety.yaml index 5cc5c148e8e19f60bb4c8264b0a1800fc31e5120..5627770be03dc4613879b7c738054c82e68eee45 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_public_safety.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_public_safety.yaml @@ -90,4 +90,5 @@ fewshot_config: 산업안전ᆞ보건과 관련된 그 밖의 사항 , 선택지 중에서 산업안전보건위원회의 심의ᆞ의결을 거치지 않아도 되는 사항은 (B) 안전ᆞ보건과 관련된 안전장치 구입 시의 적격품 여부 확인에 관한 사항입니다. 따라서, 정답은 (B) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_public_safety +task: kmmlu_cot_hard_public_safety +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_railway_and_automotive_engineering.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_railway_and_automotive_engineering.yaml index c81e158a082332dfb17616e16b26c859cc867148..5b8b436fbd93dc1618563db086c7cbbdb86235c9 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_railway_and_automotive_engineering.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_railway_and_automotive_engineering.yaml @@ -85,4 +85,5 @@ fewshot_config: 위한 것입니다. (D) 기관의 과냉 및 소음방지를 위해 일정 회전수 이상 시 슬립 발생: 유체 커플링식 냉각 팬은 기관의 과냉 및 소음 방지를 위해 일정 회전수 이상 시 슬립이 발생합니다. 이는 유체 커플링의 특성 때문입니다. 따라서, 정답은 (A) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_railway_and_automotive_engineering +task: kmmlu_cot_hard_railway_and_automotive_engineering +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_real_estate.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_real_estate.yaml index 0e7d81001ba6c163a851a3cdb35d84d6a03d8950..38df431259673bbbb2257c75fa9ae6821e5bd4e1 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_real_estate.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_real_estate.yaml @@ -88,4 +88,5 @@ fewshot_config: 따르면 개업공인중개사는 등록한 관할구역 외의 지역에 있는 중개대상물을 중개할 수 있습니다. 이 내용은 잘못된 내용입니다. 따라서, 정답은 (D) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_real_estate +task: kmmlu_cot_hard_real_estate +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_refrigerating_machinery.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_refrigerating_machinery.yaml index 7c3984e4b923675a47c582ebfec110188d0f4801..10624f2afa4378e233522ea85728cf58513a273c 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_refrigerating_machinery.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_refrigerating_machinery.yaml @@ -88,4 +88,5 @@ fewshot_config: = 200.15K입니다. 그러므로, W = 1kJ * (300.15K - 200.15K) / 200.15K = 0.5kJ입니다. 따라서, 정답은 (D) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_refrigerating_machinery +task: kmmlu_cot_hard_refrigerating_machinery +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_social_welfare.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_social_welfare.yaml index d19fb51108b300b56cf3bbe83de806c77ea9b144..64e6fb5a70f812e9c2220b93583391a3a4cbe064 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_social_welfare.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_social_welfare.yaml @@ -90,4 +90,5 @@ fewshot_config: 이는 사회복지정책의 본질적인 목표와 원칙을 반영하지 못하고 있습니다. 사회복지정책은 능력이 아닌 필요에 따라 지원을 하는 것이 원칙이며, 이를 통해 사회적 불평등을 해소하고 모든 사람이 기본적인 생활을 유지할 수 있도록 지원하는 것이 목표입니다. 따라서, 정답은 (B) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_social_welfare +task: kmmlu_cot_hard_social_welfare +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_taxation.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_taxation.yaml index 937a864e21cad34587a6887cb55f69f7ce23eb9d..fbf880678f56325cae52098ded0a7527eac7fe26 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_taxation.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_taxation.yaml @@ -104,4 +104,5 @@ fewshot_config: 국가의 안전보장 목적의 수행상 긴요하다고 인정하여 수입하는 물품을 의미합니다. 이 또한 국가의 안전보장을 위해 필요한 물품이므로 면세 대상에 해당할 것으로 보입니다. 따라서, 정답은 (A) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_taxation +task: kmmlu_cot_hard_taxation +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_telecommunications_and_wireless_technology.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_telecommunications_and_wireless_technology.yaml index ca23afc0bbf16002687bf658961164075bc13662..54c5aac8bff9e12b1cb90ebbfc8863a260412552 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_telecommunications_and_wireless_technology.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_telecommunications_and_wireless_technology.yaml @@ -83,4 +83,5 @@ fewshot_config: 증가하면, 전자기파의 세기는 1/r^2배 감소합니다. , 거리가 2배가 되면, 전자기파의 세기는 1/4배가 됩니다. 그리고 전력 밀도는 전기장과 자기장의 제곱에 비례하므로, 거리가 2배가 되면 전력 밀도는 1/4배가 됩니다. 따라서, 정답은 (D) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_telecommunications_and_wireless_technology +task: kmmlu_cot_hard_telecommunications_and_wireless_technology +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/_direct_kmmlu_yaml b/lm_eval/tasks/kmmlu/direct/_direct_kmmlu_yaml index a0c8dfdc7e8ccce1f61fb55f140b4aca539a8d4e..1ecb5fbab3b24851a8d76c151a0f12f9dd772b40 100644 --- a/lm_eval/tasks/kmmlu/direct/_direct_kmmlu_yaml +++ b/lm_eval/tasks/kmmlu/direct/_direct_kmmlu_yaml @@ -1,6 +1,3 @@ -tag: - - kmmlu - - kmmlu_direct dataset_path: HAERAE-HUB/KMMLU output_type: generate_until test_split: test diff --git a/lm_eval/tasks/kmmlu/direct/_kmmlu_direct.yaml b/lm_eval/tasks/kmmlu/direct/_kmmlu_direct.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9763d3d4d90a11cb8296941f9d6074f00a2ec482 --- /dev/null +++ b/lm_eval/tasks/kmmlu/direct/_kmmlu_direct.yaml @@ -0,0 +1,11 @@ +group: kmmlu_direct +task: + - kmmlu_direct_stem + - kmmlu_direct_other + - kmmlu_direct_applied_science + - kmmlu_direct_humss +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_applied_science.yaml b/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_applied_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..78937b3fac0526d4a4925c8d9103fc5d89a45d54 --- /dev/null +++ b/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_applied_science.yaml @@ -0,0 +1,8 @@ +group: kmmlu_direct_applied_science +task: + - kmmlu_direct_applied_science_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_humss.yaml b/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_humss.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1c8e4f206cc13d34783e711139243bd764323a08 --- /dev/null +++ b/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_humss.yaml @@ -0,0 +1,8 @@ +group: kmmlu_direct_humss +task: + - kmmlu_direct_humss_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_other.yaml b/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eb5166ec7669175d6c1c92c95f955fdb60b4c758 --- /dev/null +++ b/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_other.yaml @@ -0,0 +1,8 @@ +group: kmmlu_direct_other +task: + - kmmlu_direct_other_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_stem.yaml b/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..932cc1e5797d816853420a114dfcfb50ee4f6535 --- /dev/null +++ b/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_stem.yaml @@ -0,0 +1,8 @@ +group: kmmlu_direct_stem +task: + - kmmlu_direct_stem_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_accounting.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_accounting.yaml index d7736e8d5b918f58ffc4dfa19e3e6bd6af898980..d61a84b85d0adcc9c61a81bad09dcd0c7b2180fc 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_accounting.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_accounting.yaml @@ -1,3 +1,4 @@ dataset_name: Accounting include: _direct_kmmlu_yaml task: kmmlu_direct_accounting +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_agricultural_sciences.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_agricultural_sciences.yaml index 5bf1fa4b56fdc58cd4219164cc90b11f50886bc1..a8a2829bb5649d61eeef21e754409e2fb8437b71 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_agricultural_sciences.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_agricultural_sciences.yaml @@ -1,3 +1,4 @@ dataset_name: Agricultural-Sciences include: _direct_kmmlu_yaml task: kmmlu_direct_agricultural_sciences +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_aviation_engineering_and_maintenance.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_aviation_engineering_and_maintenance.yaml index a9a621931a8f387085557e741fc5c22c9755cb7b..d383834ffa8d6e25c67a2229bc61d9250ebcb33d 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_aviation_engineering_and_maintenance.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_aviation_engineering_and_maintenance.yaml @@ -1,3 +1,4 @@ dataset_name: Aviation-Engineering-and-Maintenance include: _direct_kmmlu_yaml task: kmmlu_direct_aviation_engineering_and_maintenance +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_biology.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_biology.yaml index ebe1765b34a3fe774d45869552d0f69e80285896..aeeb1e520f327c1215d89649c80fee74eea4af4c 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_biology.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_biology.yaml @@ -1,3 +1,4 @@ dataset_name: Biology include: _direct_kmmlu_yaml task: kmmlu_direct_biology +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemical_engineering.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemical_engineering.yaml index e5875bb7e8be076e5f7a1076b01b21bf308b5acd..921073d5cdeb5fd898a717b0fc37662d8558ae81 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemical_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: Chemical-Engineering include: _direct_kmmlu_yaml task: kmmlu_direct_chemical_engineering +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemistry.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemistry.yaml index edabfb67dd089798dcc001db737136e55eed0efe..afa5b4b2d6045dd7a7ccd8757e4c3016e9883a68 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemistry.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemistry.yaml @@ -1,3 +1,4 @@ dataset_name: Chemistry include: _direct_kmmlu_yaml task: kmmlu_direct_chemistry +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_civil_engineering.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_civil_engineering.yaml index 98ed98dd2cc5f90039d98b74ca0f711809232e14..b8c5064b93cabcd54fd7f1d7fa5fb1380712a11d 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_civil_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_civil_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: Civil-Engineering include: _direct_kmmlu_yaml task: kmmlu_direct_civil_engineering +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_computer_science.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_computer_science.yaml index c546e738d68db7e281b5d70bbf9771bced6c1300..bac82f1f45788610e2754edf5873983a11eab562 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_computer_science.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_computer_science.yaml @@ -1,3 +1,4 @@ dataset_name: Computer-Science include: _direct_kmmlu_yaml task: kmmlu_direct_computer_science +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_construction.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_construction.yaml index a0af2a16cfc082d58903758234ed0e36de0333c9..8cb9ada9c214fec51c1ef1f7a8bf29c45fdd3523 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_construction.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_construction.yaml @@ -1,3 +1,4 @@ dataset_name: Construction include: _direct_kmmlu_yaml task: kmmlu_direct_construction +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_criminal_law.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_criminal_law.yaml index 9dfdfabc5971164a63fe651c66f4c0842598ef17..642a88bc1430cae3d32b7502179afeac59ecf016 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_criminal_law.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_criminal_law.yaml @@ -1,3 +1,4 @@ dataset_name: Criminal-Law include: _direct_kmmlu_yaml task: kmmlu_direct_criminal_law +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_ecology.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_ecology.yaml index 9d182903e2abe1f3c2b3f5d4cbe955bb1bcf58c9..dffbb3c49ff5c1c9491751301e9dc0a976ce2f8e 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_ecology.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_ecology.yaml @@ -1,3 +1,4 @@ dataset_name: Ecology include: _direct_kmmlu_yaml task: kmmlu_direct_ecology +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_economics.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_economics.yaml index db4d78405a6079273f8042350fd4f785c9fe4bed..1fc5d2c3b85c54e9dc6bb43249d71fdc4505a8ea 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_economics.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_economics.yaml @@ -1,3 +1,4 @@ dataset_name: Economics include: _direct_kmmlu_yaml task: kmmlu_direct_economics +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_education.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_education.yaml index 74887e76f395c2b8565cd7c716fd410f921f6f1d..dc151c8744cd4f59802d1157f74642613d88121b 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_education.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_education.yaml @@ -1,3 +1,4 @@ dataset_name: Education include: _direct_kmmlu_yaml task: kmmlu_direct_education +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electrical_engineering.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electrical_engineering.yaml index 3455d50715d250762358c9db89f05a0c8eb521c3..208e7b165de3a0eb11ead6ff13b143c329805654 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electrical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electrical_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: Electrical-Engineering include: _direct_kmmlu_yaml task: kmmlu_direct_electrical_engineering +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electronics_engineering.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electronics_engineering.yaml index b45aa3083cb269c964b4beff2c48a9d1cfcc973c..0a61e3d1a36b5e98971fd4741c167e246543197e 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electronics_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electronics_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: Electronics-Engineering include: _direct_kmmlu_yaml task: kmmlu_direct_electronics_engineering +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_energy_management.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_energy_management.yaml index b4fb806b3808d2cb47ea68534030b9432e998b74..085f4246ea731a39ed605aee5d5d482619c70e48 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_energy_management.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_energy_management.yaml @@ -1,3 +1,4 @@ dataset_name: Energy-Management include: _direct_kmmlu_yaml task: kmmlu_direct_energy_management +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_environmental_science.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_environmental_science.yaml index 1670ff16bae6d41096f2b9c86f8361455f4c347e..104a4b9ed9045bb9674201434626f565b1ce3a1c 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_environmental_science.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_environmental_science.yaml @@ -1,3 +1,4 @@ dataset_name: Environmental-Science include: _direct_kmmlu_yaml task: kmmlu_direct_environmental_science +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_fashion.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_fashion.yaml index aef8043aa4605573b074b96b711b6f321d179f44..561e565c7b782aaa76af572b58adae5a489b8048 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_fashion.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_fashion.yaml @@ -1,3 +1,4 @@ dataset_name: Fashion include: _direct_kmmlu_yaml task: kmmlu_direct_fashion +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_food_processing.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_food_processing.yaml index f49b087fc288187a9a3363260a17bda1a68ce9bb..3050c82aa6bec8593cde036b9a14f97f04aaed6c 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_food_processing.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_food_processing.yaml @@ -1,3 +1,4 @@ dataset_name: Food-Processing include: _direct_kmmlu_yaml task: kmmlu_direct_food_processing +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_gas_technology_and_engineering.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_gas_technology_and_engineering.yaml index 00b7021c5c11fb9a0cae1958e2079e41c5854d4c..708e76d87552d688bed6d2d6fa6d529693bf3143 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_gas_technology_and_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_gas_technology_and_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: Gas-Technology-and-Engineering include: _direct_kmmlu_yaml task: kmmlu_direct_gas_technology_and_engineering +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_geomatics.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_geomatics.yaml index 5d8dc70db5eabc1af2e29d3c8588dfb04b8dedb1..0937bcfc0fd45cd36fde3bed8b2a4f3eda35a0eb 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_geomatics.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_geomatics.yaml @@ -1,3 +1,4 @@ dataset_name: Geomatics include: _direct_kmmlu_yaml task: kmmlu_direct_geomatics +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_health.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_health.yaml index 3f0d77eb78a61cd2b7b00b80311b59b011abc47e..70ef5736686418c69936d7dbbc4cb024268b10cc 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_health.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_health.yaml @@ -1,3 +1,4 @@ dataset_name: Health include: _direct_kmmlu_yaml task: kmmlu_direct_health +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_industrial_engineer.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_industrial_engineer.yaml index 39ea0bcf054c6dfef197beef942a16feffca338b..1454520195d76f3f065c834af760f8ff2eacc959 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_industrial_engineer.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_industrial_engineer.yaml @@ -1,3 +1,4 @@ dataset_name: Industrial-Engineer include: _direct_kmmlu_yaml task: kmmlu_direct_industrial_engineer +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_information_technology.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_information_technology.yaml index c42e80eda1ad438d65d1d656671d5fb1542018da..50fc6e91f00b6b15c6861f71fad50be41f79c251 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_information_technology.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_information_technology.yaml @@ -1,3 +1,4 @@ dataset_name: Information-Technology include: _direct_kmmlu_yaml task: kmmlu_direct_information_technology +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_interior_architecture_and_design.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_interior_architecture_and_design.yaml index 842534aa0a4e87d6aa4bb43b0261b85b7e47676f..638de434507ea7dd7b93fd78a1636cf09ae4fae1 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_interior_architecture_and_design.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_interior_architecture_and_design.yaml @@ -1,3 +1,4 @@ dataset_name: Interior-Architecture-and-Design include: _direct_kmmlu_yaml task: kmmlu_direct_interior_architecture_and_design +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_korean_history.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_korean_history.yaml index f1aa277a70d03a617e673c27bba1cc2d7440d156..6d6b20ba10f39562d3b8a6d7a994407e0df312ed 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_korean_history.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_korean_history.yaml @@ -1,3 +1,4 @@ dataset_name: Korean-History include: _direct_kmmlu_yaml task: kmmlu_direct_korean_history +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_law.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_law.yaml index 602f8982f6ca939766cf0d87f0546eef5a4452de..296858527260238bc9e4d5b1342699f585b12aed 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_law.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_law.yaml @@ -1,3 +1,4 @@ dataset_name: Law include: _direct_kmmlu_yaml task: kmmlu_direct_law +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_machine_design_and_manufacturing.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_machine_design_and_manufacturing.yaml index bfb923c2a9ac76515f3796a5a8c73770ed9fc586..587d25d0e4fce1abccbc5ee12e4613f77609d664 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_machine_design_and_manufacturing.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_machine_design_and_manufacturing.yaml @@ -1,3 +1,4 @@ dataset_name: Machine-Design-and-Manufacturing include: _direct_kmmlu_yaml task: kmmlu_direct_machine_design_and_manufacturing +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_management.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_management.yaml index 7352a1360b2a0cb32a85e88351cccfad62c142d3..aec441bb022279edca8157e2347507173e37ca02 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_management.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_management.yaml @@ -1,3 +1,4 @@ dataset_name: Management include: _direct_kmmlu_yaml task: kmmlu_direct_management +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_maritime_engineering.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_maritime_engineering.yaml index fa0c8f319f35d3343ec4cd5b3be8247fa8fe3e61..e7e1f12e7f9af7dcbd1775242105b929055dd32b 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_maritime_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_maritime_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: Maritime-Engineering include: _direct_kmmlu_yaml task: kmmlu_direct_maritime_engineering +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_marketing.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_marketing.yaml index c3b524d853c19b9943c0e50bf8842632e8971344..10dadc008401647e1e9c58e034937abb5b918f4f 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_marketing.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_marketing.yaml @@ -1,3 +1,4 @@ dataset_name: Marketing include: _direct_kmmlu_yaml task: kmmlu_direct_marketing +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_materials_engineering.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_materials_engineering.yaml index f04e0975a0700c13d9e816c5d37981d22d8f1b6c..d04632665b687445804c20f4d91df0c28d420cb4 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_materials_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_materials_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: Materials-Engineering include: _direct_kmmlu_yaml task: kmmlu_direct_materials_engineering +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_math.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_math.yaml index 6c5d28af05edd5bb5c3c9207930c1994068ce1fe..20d17c01db176cf2dc45ac020356e2df0fb5a65e 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_math.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_math.yaml @@ -1,3 +1,4 @@ dataset_name: Math include: _direct_kmmlu_yaml task: kmmlu_direct_math +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_mechanical_engineering.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_mechanical_engineering.yaml index a253535adb6c44a8fa8340b106539205cbe6c689..3ddb279638c11f01c2f3c549aaa1d35e8d1d98ab 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_mechanical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_mechanical_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: Mechanical-Engineering include: _direct_kmmlu_yaml task: kmmlu_direct_mechanical_engineering +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_nondestructive_testing.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_nondestructive_testing.yaml index 3b8dc7e7845394754ede20b72534fe889c7c564f..3e37bd1c1ca6dbb97b4ee35a6191304e39c97bcd 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_nondestructive_testing.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_nondestructive_testing.yaml @@ -1,3 +1,4 @@ dataset_name: Nondestructive-Testing include: _direct_kmmlu_yaml task: kmmlu_direct_nondestructive_testing +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_patent.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_patent.yaml index 2afff2c373a4e5a201a233de96d71baf6d980937..e829b99583a0f125856b2385b0bce6c5130c775d 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_patent.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_patent.yaml @@ -1,3 +1,4 @@ dataset_name: Patent include: _direct_kmmlu_yaml task: kmmlu_direct_patent +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_political_science_and_sociology.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_political_science_and_sociology.yaml index 2209abbf05d8f78017fdcdc6b4178d5c48a2305a..adf6c1b7f2b1bebfb57cd27378cd08475fc4fa2d 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_political_science_and_sociology.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_political_science_and_sociology.yaml @@ -1,3 +1,4 @@ dataset_name: Political-Science-and-Sociology include: _direct_kmmlu_yaml task: kmmlu_direct_political_science_and_sociology +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_psychology.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_psychology.yaml index 140302d01f32ab5d0e55cfe01748659536a2262c..a8ccfcbd25825c71ecedfed8599d3fe5bfe863ed 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_psychology.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_psychology.yaml @@ -1,3 +1,4 @@ dataset_name: Psychology include: _direct_kmmlu_yaml task: kmmlu_direct_psychology +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_public_safety.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_public_safety.yaml index 5bb16a90d1f5303b919e8f348b3eb79a9f7cf296..5926a45c96b701637a1a6d712449268ca1f118dc 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_public_safety.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_public_safety.yaml @@ -1,3 +1,4 @@ dataset_name: Public-Safety include: _direct_kmmlu_yaml task: kmmlu_direct_public_safety +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_railway_and_automotive_engineering.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_railway_and_automotive_engineering.yaml index 2a13204a23bbb4be1de93fceb697cb37d8319ae6..fa92c9fb807a322f397e10bc7faa82da8b09cf3b 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_railway_and_automotive_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_railway_and_automotive_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: Railway-and-Automotive-Engineering include: _direct_kmmlu_yaml task: kmmlu_direct_railway_and_automotive_engineering +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_real_estate.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_real_estate.yaml index 5a5202b65d8c9ba693f470e953b22ea3b721e84a..e8872a53035ba58ce8ed6a94d19c0c5d65f5d96c 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_real_estate.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_real_estate.yaml @@ -1,3 +1,4 @@ dataset_name: Real-Estate include: _direct_kmmlu_yaml task: kmmlu_direct_real_estate +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_refrigerating_machinery.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_refrigerating_machinery.yaml index 44f9e428bbd8d8c7eb33617a6498d2856a6e1c1a..7378739041e20ef91e64eb4c7a9f0fb42032fc21 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_refrigerating_machinery.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_refrigerating_machinery.yaml @@ -1,3 +1,4 @@ dataset_name: Refrigerating-Machinery include: _direct_kmmlu_yaml task: kmmlu_direct_refrigerating_machinery +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_social_welfare.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_social_welfare.yaml index fa13bdff6a4791c8e20fe905a84db0586af11afa..52f731fb370863cabe5895be26ea128790cab0b5 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_social_welfare.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_social_welfare.yaml @@ -1,3 +1,4 @@ dataset_name: Social-Welfare include: _direct_kmmlu_yaml task: kmmlu_direct_social_welfare +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_taxation.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_taxation.yaml index 69e71d6dfa6284cc701221c5c187969be5e92832..caa0d7984173349a2e4e7f5cd1f4a8b86a107726 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_taxation.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_taxation.yaml @@ -1,3 +1,4 @@ dataset_name: Taxation include: _direct_kmmlu_yaml task: kmmlu_direct_taxation +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_telecommunications_and_wireless_technology.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_telecommunications_and_wireless_technology.yaml index f4d1fd05c876bf269c0aae1f3590f8801f7e9955..8f98b1d4984b352c7f872292a7ac4e2cd9a7fdae 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_telecommunications_and_wireless_technology.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_telecommunications_and_wireless_technology.yaml @@ -1,3 +1,4 @@ dataset_name: Telecommunications-and-Wireless-Technology include: _direct_kmmlu_yaml task: kmmlu_direct_telecommunications_and_wireless_technology +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/_direct_hard_kmmlu_yaml b/lm_eval/tasks/kmmlu/direct_hard/_direct_hard_kmmlu_yaml index 3cf6359206ba07951a7ac08781f8dd6d3fd1450a..f5ed0fda26293003d9ccd37c54d0f4d76da7eea2 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/_direct_hard_kmmlu_yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/_direct_hard_kmmlu_yaml @@ -1,6 +1,3 @@ -tag: - - kmmlu - - kmmlu_hard_direct dataset_path: HAERAE-HUB/KMMLU-HARD output_type: generate_until test_split: test diff --git a/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard.yaml b/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard.yaml new file mode 100644 index 0000000000000000000000000000000000000000..54206cdb779c2d7354f9d676731bf8d544a10ab6 --- /dev/null +++ b/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard.yaml @@ -0,0 +1,11 @@ +group: kmmlu_direct_hard +task: + - kmmlu_direct_hard_stem + - kmmlu_direct_hard_other + - kmmlu_direct_hard_applied_science + - kmmlu_direct_hard_humss +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_applied_science.yaml b/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_applied_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0f70ae139dd537613f70dd069cacb535a231e44a --- /dev/null +++ b/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_applied_science.yaml @@ -0,0 +1,8 @@ +group: kmmlu_direct_hard_applied_science +task: + - kmmlu_direct_hard_applied_science_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_humss.yaml b/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_humss.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b28fdd1522ba82a26d71af523682fbc93d0a6656 --- /dev/null +++ b/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_humss.yaml @@ -0,0 +1,8 @@ +group: kmmlu_direct_hard_humss +task: + - kmmlu_direct_hard_humss_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_other.yaml b/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f216caa648596d1c1ff3bf2597c04352ae1292c5 --- /dev/null +++ b/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_other.yaml @@ -0,0 +1,8 @@ +group: kmmlu_direct_hard_other +task: + - kmmlu_direct_hard_other_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_stem.yaml b/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..026c6b48925c3ca3a19af3823846641ba5cac75e --- /dev/null +++ b/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_stem.yaml @@ -0,0 +1,8 @@ +group: kmmlu_direct_hard_stem +task: + - kmmlu_direct_hard_stem_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_accounting.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_accounting.yaml index ca805e955ec5ce5cb25e00e321f489646e89628f..d92b933d4bf31038e2aba5339a7ed5de95acf82c 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_accounting.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_accounting.yaml @@ -1,3 +1,4 @@ dataset_name: accounting include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_accounting +task: kmmlu_direct_hard_accounting +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_agricultural_sciences.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_agricultural_sciences.yaml index 7348344468bf57bb54a15063d5e59483c17a22c1..d78427d0211db3d8c5a7fbb1c4a93a612416c86d 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_agricultural_sciences.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_agricultural_sciences.yaml @@ -1,3 +1,4 @@ dataset_name: agricultural_sciences include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_agricultural_sciences +task: kmmlu_direct_hard_agricultural_sciences +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_aviation_engineering_and_maintenance.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_aviation_engineering_and_maintenance.yaml index 25c91cb6e5e55fcc578bd455086b994f1dd51d8c..6713f04da2495d8790d768e79e13b33ef057433a 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_aviation_engineering_and_maintenance.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_aviation_engineering_and_maintenance.yaml @@ -1,3 +1,4 @@ dataset_name: aviation_engineering_and_maintenance include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_aviation_engineering_and_maintenance +task: kmmlu_direct_hard_aviation_engineering_and_maintenance +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_biology.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_biology.yaml index a7bc8417b030a06bfd2308384525e6a5b4dcacc4..e98a380f9255dd6afe739a0817361907125e54c4 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_biology.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_biology.yaml @@ -1,3 +1,4 @@ dataset_name: biology include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_biology +task: kmmlu_direct_hard_biology +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemical_engineering.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemical_engineering.yaml index 063974afd2f1ba984722043d50b6c4aaabbc1323..b505e3175f3e9459c8694f9216497c4415e0abf3 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemical_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: chemical_engineering include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_chemical_engineering +task: kmmlu_direct_hard_chemical_engineering +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemistry.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemistry.yaml index 371db7bfbffb6dfee72baf0482be6d2acea883e4..d805e2340f321dc4a1b0a1b1fca7e1eed5c2e77a 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemistry.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemistry.yaml @@ -1,3 +1,4 @@ dataset_name: chemistry include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_chemistry +task: kmmlu_direct_hard_chemistry +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_civil_engineering.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_civil_engineering.yaml index ba2c23b2d1866b4b0dfe71304758e26e94a42a89..30622d50c6811bcfc4bba3a35aa0f6d29246ad0b 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_civil_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_civil_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: civil_engineering include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_civil_engineering +task: kmmlu_direct_hard_civil_engineering +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_computer_science.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_computer_science.yaml index 2a388ff474281c525b8e674f204376c16e522641..bc0f5a37a1203e27a2bf047df386c9f32637e4bb 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_computer_science.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_computer_science.yaml @@ -1,3 +1,4 @@ dataset_name: computer_science include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_computer_science +task: kmmlu_direct_hard_computer_science +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_construction.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_construction.yaml index faab391b9012efdf167a43105649313cb46a1c47..e050e106754accd43523f8dcb1facee64b4ac27b 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_construction.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_construction.yaml @@ -1,3 +1,4 @@ dataset_name: construction include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_construction +task: kmmlu_direct_hard_construction +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_criminal_law.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_criminal_law.yaml index d2679f1ecd6dcc2b47de06e3fdf30bb69a9e4a0a..3072b6f0b538a8fc33e2e410da3a43669626c444 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_criminal_law.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_criminal_law.yaml @@ -1,3 +1,4 @@ dataset_name: criminal_law include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_criminal_law +task: kmmlu_direct_hard_criminal_law +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_ecology.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_ecology.yaml index adedf9d6e704a36368249260114aa8a80954a24a..3129f467d25af5d389380cefe92b2345f8bf78ff 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_ecology.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_ecology.yaml @@ -1,3 +1,4 @@ dataset_name: ecology include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_ecology +task: kmmlu_direct_hard_ecology +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_economics.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_economics.yaml index f42e5b8dad2a7f4481dbd7d5e476ccccef222ede..87069840e66e636cd2627ed3fb574b1b91019892 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_economics.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_economics.yaml @@ -1,3 +1,4 @@ dataset_name: economics include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_economics +task: kmmlu_direct_hard_economics +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_education.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_education.yaml index 9c90432fe26075d1c14f84f5765f8e3198deb2ed..75baa1364b434443285205073e1192b195670e17 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_education.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_education.yaml @@ -1,3 +1,4 @@ dataset_name: education include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_education +task: kmmlu_direct_hard_education +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electrical_engineering.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electrical_engineering.yaml index 780dad2268fc86c7eb5b590764fd4859997dc7a0..789cdfb81cd5f34842fc2985fb8927e82162a8e4 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electrical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electrical_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: electrical_engineering include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_electrical_engineering +task: kmmlu_direct_hard_electrical_engineering +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electronics_engineering.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electronics_engineering.yaml index e01781549fd0bf1982b895ba2041c3d6f9ec9644..9a1736e0b2584d208cfb90ed777d8de10030e145 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electronics_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electronics_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: electronics_engineering include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_electronics_engineering +task: kmmlu_direct_hard_electronics_engineering +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_energy_management.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_energy_management.yaml index d4c2ca7d643d71d3f1464e1f35bd49e944738ee6..4653272e02917c2a01dded35090ee0a0cff4ab27 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_energy_management.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_energy_management.yaml @@ -1,3 +1,4 @@ dataset_name: energy_management include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_energy_management +task: kmmlu_direct_hard_energy_management +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_environmental_science.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_environmental_science.yaml index de511a09f02c411dedba2ac816a34c11b6805caa..60c0253e0f13c9608f22ad5a58a3e10f8527053c 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_environmental_science.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_environmental_science.yaml @@ -1,3 +1,4 @@ dataset_name: environmental_science include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_environmental_science +task: kmmlu_direct_hard_environmental_science +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_fashion.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_fashion.yaml index 26f0617dfb641bd11f45f482c7180e12a318a0f5..86bbb9b49c03165daff8c6588f9ca053e798c678 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_fashion.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_fashion.yaml @@ -1,3 +1,4 @@ dataset_name: fashion include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_fashion +task: kmmlu_direct_hard_fashion +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_food_processing.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_food_processing.yaml index e48143d2c3bc7a69db87ac5d68f4a8951c1d391d..6b2817d2c0f38faa9c9d46e0f40385a1f928293c 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_food_processing.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_food_processing.yaml @@ -1,3 +1,4 @@ dataset_name: food_processing include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_food_processing +task: kmmlu_direct_hard_food_processing +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_gas_technology_and_engineering.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_gas_technology_and_engineering.yaml index eb5211ad857bfe99cc41062f21b8c47d008c3c64..c2d2f4772b86a764f4c73ef391a31acd7a68f787 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_gas_technology_and_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_gas_technology_and_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: gas_technology_and_engineering include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_gas_technology_and_engineering +task: kmmlu_direct_hard_gas_technology_and_engineering +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_geomatics.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_geomatics.yaml index a25f3c1a7eefe75cd11ce6d45f62ab898f30922b..9dadc72dc31bf2b7ced96c940a2bdcf3d8ab681f 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_geomatics.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_geomatics.yaml @@ -1,3 +1,4 @@ dataset_name: geomatics include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_geomatics +task: kmmlu_direct_hard_geomatics +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_health.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_health.yaml index 0fef809eebe36f65d541ce8741e4e0f2ac054da1..f1bf4c778c8c5ef24d135833edfae70e09cc138d 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_health.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_health.yaml @@ -1,3 +1,4 @@ dataset_name: health include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_health +task: kmmlu_direct_hard_health +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_industrial_engineer.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_industrial_engineer.yaml index d7ca26e58ac90c69cb2bffcf7a4d95657b019019..5f7b73ea5fa64072648fcedf88406a66b695ca74 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_industrial_engineer.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_industrial_engineer.yaml @@ -1,3 +1,4 @@ dataset_name: industrial_engineer include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_industrial_engineer +task: kmmlu_direct_hard_industrial_engineer +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_information_technology.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_information_technology.yaml index 0f8d01ec926a4dc197015d051b9c763889049ae1..a1c5cf9dbf3369475f573419f127cd386153017e 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_information_technology.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_information_technology.yaml @@ -1,3 +1,4 @@ dataset_name: information_technology include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_information_technology +task: kmmlu_direct_hard_information_technology +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_interior_architecture_and_design.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_interior_architecture_and_design.yaml index 3b1303810a9fbee6d966095fabbcc773dc489e71..65a20727fc67f1a28db33968384e343ea12d3fc1 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_interior_architecture_and_design.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_interior_architecture_and_design.yaml @@ -1,3 +1,4 @@ dataset_name: interior_architecture_and_design include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_interior_architecture_and_design +task: kmmlu_direct_hard_interior_architecture_and_design +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_korean_history.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_korean_history.yaml index c4d595d19636e0698930b82b7f1d6c1605d50e10..c10a9f576ff6e633a9fa7fabb7eebdff1fb01728 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_korean_history.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_korean_history.yaml @@ -1,3 +1,4 @@ dataset_name: korean_history include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_korean_history +task: kmmlu_direct_hard_korean_history +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_law.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_law.yaml index 168f0340590d9736548eaeb56335e734d756fdac..96e5514f25195742e62c7632625d0e9e0506a2fe 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_law.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_law.yaml @@ -1,3 +1,4 @@ dataset_name: law include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_law +task: kmmlu_direct_hard_law +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_machine_design_and_manufacturing.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_machine_design_and_manufacturing.yaml index 73665b1bc0721e918c06ecc7b4256aceda23f704..50dfd63b230bb0fb68abe387f856be5121f0a5c3 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_machine_design_and_manufacturing.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_machine_design_and_manufacturing.yaml @@ -1,3 +1,4 @@ dataset_name: machine_design_and_manufacturing include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_machine_design_and_manufacturing +task: kmmlu_direct_hard_machine_design_and_manufacturing +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_management.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_management.yaml index 6eb945d27e69a636cea53c1c8ba9a35c569fe7f5..48c339d7439af8e827aef5ec0e0a8391209f86bf 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_management.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_management.yaml @@ -1,3 +1,4 @@ dataset_name: management include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_management +task: kmmlu_direct_hard_management +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_maritime_engineering.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_maritime_engineering.yaml index 4078cf973b90f3e03ac88a7670b3344a159fef2e..937bfd27f20d790f4c768503917c2455f891244e 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_maritime_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_maritime_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: maritime_engineering include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_maritime_engineering +task: kmmlu_direct_hard_maritime_engineering +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_marketing.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_marketing.yaml index 37d62bb1bad3e89181247bc4dfa0d8b9d4abbaaf..1ae4088a1687349fcfc65636610636ecfc96b2f1 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_marketing.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_marketing.yaml @@ -1,3 +1,4 @@ dataset_name: marketing include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_marketing +task: kmmlu_direct_hard_marketing +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_materials_engineering.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_materials_engineering.yaml index c1e2645c2b68fb818df88b589c6bc3c87e2fa4ca..432460ebf7b0368ad5b51f69bce0fd80c6f582fd 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_materials_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_materials_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: materials_engineering include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_materials_engineering +task: kmmlu_direct_hard_materials_engineering +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_math.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_math.yaml index f5f3373a8aee37d793e49693b53a5c6bd514cb78..53d2fca14d3d29b1423d2f54b30831ba98dd9d33 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_math.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_math.yaml @@ -1,3 +1,4 @@ dataset_name: math include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_math +task: kmmlu_direct_hard_math +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_mechanical_engineering.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_mechanical_engineering.yaml index dae55511a963529a8980118cdf6a9971eae611bc..1a3994ea59183b14325aa6741a39096517cbcb4e 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_mechanical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_mechanical_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: mechanical_engineering include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_mechanical_engineering +task: kmmlu_direct_hard_mechanical_engineering +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_nondestructive_testing.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_nondestructive_testing.yaml index 3ff9583743953fde9d681a9d4c4655b72d7c7e3c..909c502c02556e3a57410355b6e433ff24a03f0d 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_nondestructive_testing.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_nondestructive_testing.yaml @@ -1,3 +1,4 @@ dataset_name: nondestructive_testing include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_nondestructive_testing +task: kmmlu_direct_hard_nondestructive_testing +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_patent.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_patent.yaml index d913752b0bb3f9cfd0c47eb8919f4beb6e921adb..d8faf9723755ef53d8be2bdac483009abd10cf12 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_patent.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_patent.yaml @@ -1,3 +1,4 @@ dataset_name: patent include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_patent +task: kmmlu_direct_hard_patent +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_political_science_and_sociology.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_political_science_and_sociology.yaml index 8a5d96b6000a27ff3631fbf4c42b89ea3a41fc9a..0b6505074663bc8e98b391462b8f4b41b924c4cd 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_political_science_and_sociology.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_political_science_and_sociology.yaml @@ -1,3 +1,4 @@ dataset_name: political_science_and_sociology include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_political_science_and_sociology +task: kmmlu_direct_hard_political_science_and_sociology +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_psychology.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_psychology.yaml index 9fbf0d3191e885cd1486caf148d1c723ea142ee2..b1a6f7777f22d01310c4b798f44a72b7aa3c7f9b 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_psychology.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_psychology.yaml @@ -1,3 +1,4 @@ dataset_name: psychology include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_psychology +task: kmmlu_direct_hard_psychology +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_public_safety.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_public_safety.yaml index b376c4ebae7574364b1157afd65938237eeca209..3da462946a87b77ab21887c8ca2cf1e1ba26bfb8 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_public_safety.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_public_safety.yaml @@ -1,3 +1,4 @@ dataset_name: public_safety include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_public_safety +task: kmmlu_direct_hard_public_safety +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_railway_and_automotive_engineering.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_railway_and_automotive_engineering.yaml index 0eb534e579c125e2e9951443649a5fbc084da47f..74e5e02f43da2789a7e99481917160ddf5f369ec 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_railway_and_automotive_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_railway_and_automotive_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: railway_and_automotive_engineering include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_railway_and_automotive_engineering +task: kmmlu_direct_hard_railway_and_automotive_engineering +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_real_estate.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_real_estate.yaml index 9c3df599ee0bae86ec979fabd1b3b118c3034c08..8f23fae524939eba477f18d87347c489dc20183f 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_real_estate.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_real_estate.yaml @@ -1,3 +1,4 @@ dataset_name: real_estate include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_real_estate +task: kmmlu_direct_hard_real_estate +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_refrigerating_machinery.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_refrigerating_machinery.yaml index f62e8e9559fb0f0cb8795afd7027093b65d822f1..192a1f2c0da7c395e2f6f99b08e3e6bcdf822f94 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_refrigerating_machinery.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_refrigerating_machinery.yaml @@ -1,3 +1,4 @@ dataset_name: refrigerating_machinery include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_refrigerating_machinery +task: kmmlu_direct_hard_refrigerating_machinery +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_social_welfare.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_social_welfare.yaml index ad4dc2cf373aab0a4ee7e56c9e5ec66b5cd7bcec..c24babc33af228426aeab675ad0c60fefdd90255 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_social_welfare.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_social_welfare.yaml @@ -1,3 +1,4 @@ dataset_name: social_welfare include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_social_welfare +task: kmmlu_direct_hard_social_welfare +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_taxation.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_taxation.yaml index 445ab693d6a3064ea35a169d2d7327f6f0942687..17586af6d69cb01eeeec768b2d22ee8a0755b316 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_taxation.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_taxation.yaml @@ -1,3 +1,4 @@ dataset_name: taxation include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_taxation +task: kmmlu_direct_hard_taxation +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_telecommunications_and_wireless_technology.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_telecommunications_and_wireless_technology.yaml index 498b2fb2d661089325953ea8de407e08fb9d4934..bed0df91c97f7fab0f76210137687c765a834f4d 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_telecommunications_and_wireless_technology.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_telecommunications_and_wireless_technology.yaml @@ -1,3 +1,4 @@ dataset_name: telecommunications_and_wireless_technology include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_telecommunications_and_wireless_technology +task: kmmlu_direct_hard_telecommunications_and_wireless_technology +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/_hard_kmmlu_yaml b/lm_eval/tasks/kmmlu/hard/_hard_kmmlu_yaml index 26c4105b3144701252e44ea7e7d615057fc73beb..b3e6970527d9903d7bcf29397d30f7237a60679c 100644 --- a/lm_eval/tasks/kmmlu/hard/_hard_kmmlu_yaml +++ b/lm_eval/tasks/kmmlu/hard/_hard_kmmlu_yaml @@ -1,6 +1,3 @@ -tag: - - kmmlu - - kmmlu_hard dataset_path: HAERAE-HUB/KMMLU-HARD output_type: multiple_choice test_split: test @@ -12,8 +9,5 @@ metric_list: - metric: acc aggregation: mean higher_is_better: true - - metric: acc_norm - aggregation: mean - higher_is_better: true metadata: version: 2.0 diff --git a/lm_eval/tasks/kmmlu/hard/_kmmlu_hard.yaml b/lm_eval/tasks/kmmlu/hard/_kmmlu_hard.yaml new file mode 100644 index 0000000000000000000000000000000000000000..827e74ec100edf3ce40d55e510a93a248ad48926 --- /dev/null +++ b/lm_eval/tasks/kmmlu/hard/_kmmlu_hard.yaml @@ -0,0 +1,11 @@ +group: kmmlu_hard +task: + - kmmlu_hard_stem + - kmmlu_hard_other + - kmmlu_hard_applied_science + - kmmlu_hard_humss +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_applied_science.yaml b/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_applied_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..76d383af040a29ac9d0944c431dc643179d93493 --- /dev/null +++ b/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_applied_science.yaml @@ -0,0 +1,8 @@ +group: kmmlu_hard_applied_science +task: + - kmmlu_hard_applied_science_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_humss.yaml b/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_humss.yaml new file mode 100644 index 0000000000000000000000000000000000000000..39eb5a7a2621b40ede548d1bf31ccdb42917c333 --- /dev/null +++ b/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_humss.yaml @@ -0,0 +1,8 @@ +group: kmmlu_hard_humss +task: + - kmmlu_hard_humss_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_other.yaml b/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5759fe8844654211f1535fd570dd64d8d607f870 --- /dev/null +++ b/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_other.yaml @@ -0,0 +1,8 @@ +group: kmmlu_hard_other +task: + - kmmlu_hard_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_stem.yaml b/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ee14c726413524214d9719cee95a50aa9d1cd621 --- /dev/null +++ b/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_stem.yaml @@ -0,0 +1,8 @@ +group: kmmlu_hard_stem +task: + - kmmlu_hard_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_accounting.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_accounting.yaml index 8112903b53f96916fdf07e3fb8e5cb85ce286b16..0c341baac0f7897998f48f1e6b3553023e18ef95 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_accounting.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_accounting.yaml @@ -1,3 +1,4 @@ dataset_name: accounting include: _hard_kmmlu_yaml task: kmmlu_hard_accounting +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_agricultural_sciences.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_agricultural_sciences.yaml index 3a20948b62b880cd001f952c8a78908b238391ae..90d284c8f700f0256454a9a73c19b32a64e41b38 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_agricultural_sciences.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_agricultural_sciences.yaml @@ -1,3 +1,4 @@ dataset_name: agricultural_sciences include: _hard_kmmlu_yaml task: kmmlu_hard_agricultural_sciences +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_aviation_engineering_and_maintenance.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_aviation_engineering_and_maintenance.yaml index 87b3845f28561d4be1a3437995ad08015ac1ae0c..5ec90f362f971a7e7c08d304eabd53f0b0762759 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_aviation_engineering_and_maintenance.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_aviation_engineering_and_maintenance.yaml @@ -1,3 +1,4 @@ dataset_name: aviation_engineering_and_maintenance include: _hard_kmmlu_yaml task: kmmlu_hard_aviation_engineering_and_maintenance +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_biology.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_biology.yaml index 0a28b7c7caac3173813f0b9cdb81ab27a6b234f7..045e17e7807bd982ffac12bb2375b7126bdef024 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_biology.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_biology.yaml @@ -1,3 +1,4 @@ dataset_name: biology include: _hard_kmmlu_yaml task: kmmlu_hard_biology +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemical_engineering.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemical_engineering.yaml index 8fc448a81ab4d883e1e7fe6456d5371541356f1e..cbfa42eb2041e55008503afd0034ad096fc975f0 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemical_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: chemical_engineering include: _hard_kmmlu_yaml task: kmmlu_hard_chemical_engineering +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemistry.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemistry.yaml index 366c95026df012c078163a5a8e9080aafda16b32..67c65d659834015cfdd0315bb9244debb1aacf45 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemistry.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemistry.yaml @@ -1,3 +1,4 @@ dataset_name: chemistry include: _hard_kmmlu_yaml task: kmmlu_hard_chemistry +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_civil_engineering.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_civil_engineering.yaml index ba1a15ad8cb268adc0aeaa96a06418d18209ecda..58e3c87a84b5ea7e8e4a3e5278c12b959ace12ef 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_civil_engineering.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_civil_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: civil_engineering include: _hard_kmmlu_yaml task: kmmlu_hard_civil_engineering +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_computer_science.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_computer_science.yaml index 4e1f12135248d2cdabf32771fcc4bcbb62de68f5..42f91467679c3c46fa7c05ca296c44974c05feec 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_computer_science.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_computer_science.yaml @@ -1,3 +1,4 @@ dataset_name: computer_science include: _hard_kmmlu_yaml task: kmmlu_hard_computer_science +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_construction.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_construction.yaml index 8331379cf222bacb760e18388dd2c21c53a231da..55a5a1d0d99d889eaff35f030b9800a58b332f56 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_construction.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_construction.yaml @@ -1,3 +1,4 @@ dataset_name: construction include: _hard_kmmlu_yaml task: kmmlu_hard_construction +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_criminal_law.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_criminal_law.yaml index b7acd49a06687b6b96c2c9e18bac1295bbb7d2b5..14e4d5ad65b838d3df1456c142f5588fa4b43806 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_criminal_law.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_criminal_law.yaml @@ -1,3 +1,4 @@ dataset_name: criminal_law include: _hard_kmmlu_yaml task: kmmlu_hard_criminal_law +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_ecology.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_ecology.yaml index 6542c1eef98cbb3080777ed7583d19c7b71659ee..c737b1abaf7c55898a1123d4d4161aab10651cd9 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_ecology.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_ecology.yaml @@ -1,3 +1,4 @@ dataset_name: ecology include: _hard_kmmlu_yaml task: kmmlu_hard_ecology +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_economics.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_economics.yaml index 4f1bfba0658e65f3485264af2f92eac3105d93dc..9a0084dc3867f40e85a1b82d6a2d9e0df6725b99 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_economics.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_economics.yaml @@ -1,3 +1,4 @@ dataset_name: economics include: _hard_kmmlu_yaml task: kmmlu_hard_economics +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_education.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_education.yaml index 0f6a6a80780dfbaada0f21303e08935f89d2871f..568d094d67e99094758faebc31e3ef441b1d73f6 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_education.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_education.yaml @@ -1,3 +1,4 @@ dataset_name: education include: _hard_kmmlu_yaml task: kmmlu_hard_education +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_electrical_engineering.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_electrical_engineering.yaml index 51625c1ec372785ceea741d6aaff21c47316458d..ad46c486570051b9355acd9a91a07c74631621fb 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_electrical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_electrical_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: electrical_engineering include: _hard_kmmlu_yaml task: kmmlu_hard_electrical_engineering +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_electronics_engineering.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_electronics_engineering.yaml index 252ecc19d5e0bb91763e5efa5ea4edd083967ba8..843c92a056a5d4a47e610f1c1d5cdc144ff4305b 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_electronics_engineering.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_electronics_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: electronics_engineering include: _hard_kmmlu_yaml task: kmmlu_hard_electronics_engineering +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_energy_management.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_energy_management.yaml index 062204f1dea6473a74eeae80db0ed1017b0ccbe2..dcfe7f36c1cf5a64d8208898220f50e411ad8b0c 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_energy_management.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_energy_management.yaml @@ -1,3 +1,4 @@ dataset_name: energy_management include: _hard_kmmlu_yaml task: kmmlu_hard_energy_management +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_environmental_science.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_environmental_science.yaml index d7f32dc5b518796f78896eec6fdd2e1dbf3d2b83..a0ae1b8191f5efeb7523fa78d3d83568748f220f 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_environmental_science.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_environmental_science.yaml @@ -1,3 +1,4 @@ dataset_name: environmental_science include: _hard_kmmlu_yaml task: kmmlu_hard_environmental_science +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_fashion.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_fashion.yaml index 9448efcf8c4775eab3822be73635d80ba35d0c12..3ba973ba6a03648f05302b424ad809ff2a1571bb 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_fashion.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_fashion.yaml @@ -1,3 +1,4 @@ dataset_name: fashion include: _hard_kmmlu_yaml task: kmmlu_hard_fashion +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_food_processing.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_food_processing.yaml index 138920efbc29906fd6975b29c9107f6fa80bceda..cd08fe3b99d93cf6aa2dfde0deed6072a6b79478 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_food_processing.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_food_processing.yaml @@ -1,3 +1,4 @@ dataset_name: food_processing include: _hard_kmmlu_yaml task: kmmlu_hard_food_processing +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_gas_technology_and_engineering.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_gas_technology_and_engineering.yaml index 14e213b583ec83cdf173614f0220a91323521f4a..fe30680ae6fc63d75c5ab870cb81b38eab6b21b8 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_gas_technology_and_engineering.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_gas_technology_and_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: gas_technology_and_engineering include: _hard_kmmlu_yaml task: kmmlu_hard_gas_technology_and_engineering +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_geomatics.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_geomatics.yaml index 0370a7a7550b3dc83919b4c01889a89fea89bdb2..53b52e96edcbc0ba14701c764dd9d46863c54083 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_geomatics.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_geomatics.yaml @@ -1,3 +1,4 @@ dataset_name: geomatics include: _hard_kmmlu_yaml task: kmmlu_hard_geomatics +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_health.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_health.yaml index c5e2ba98addb3794fccfa9b58bfdd1bb869e1acc..dcd2b179d6b50e5ddc0c642289350535bf86089f 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_health.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_health.yaml @@ -1,3 +1,4 @@ dataset_name: health include: _hard_kmmlu_yaml task: kmmlu_hard_health +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_industrial_engineer.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_industrial_engineer.yaml index d3cbef78bfe12d8ac674972b6ae9ebab0ce5ff67..2e8449ffd7128f4c98f91605f9d53647363ece9d 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_industrial_engineer.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_industrial_engineer.yaml @@ -1,3 +1,4 @@ dataset_name: industrial_engineer include: _hard_kmmlu_yaml task: kmmlu_hard_industrial_engineer +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_information_technology.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_information_technology.yaml index 4af23d30302b37688bb307509811d332f2376172..86ded35de128bf2e34ef831bc85e7aac4beb4373 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_information_technology.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_information_technology.yaml @@ -1,3 +1,4 @@ dataset_name: information_technology include: _hard_kmmlu_yaml task: kmmlu_hard_information_technology +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_interior_architecture_and_design.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_interior_architecture_and_design.yaml index 76bfe50c340cb571da490f3fe5b8f1e5e5743b8f..55de26414fd67b406e7fef7ee9f9b2299a1b2a63 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_interior_architecture_and_design.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_interior_architecture_and_design.yaml @@ -1,3 +1,4 @@ dataset_name: interior_architecture_and_design include: _hard_kmmlu_yaml task: kmmlu_hard_interior_architecture_and_design +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_korean_history.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_korean_history.yaml index 60ff94e7ff39c5d24bcc4be97d11c4ddcbd608a5..4d4152b7945d76ad61e2033c3001d3b326f898dd 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_korean_history.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_korean_history.yaml @@ -1,3 +1,4 @@ dataset_name: korean_history include: _hard_kmmlu_yaml task: kmmlu_hard_korean_history +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_law.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_law.yaml index aeec24dcd3c5952ca3acc8d27290f9017868169c..0a75d9041c850f9750e588eea9683efe9e682497 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_law.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_law.yaml @@ -1,3 +1,4 @@ dataset_name: law include: _hard_kmmlu_yaml task: kmmlu_hard_law +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_machine_design_and_manufacturing.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_machine_design_and_manufacturing.yaml index 222f89bacd4c549ced153434568fb4b065353c51..210ffd8feb379377d5613aea89d98240c387a94f 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_machine_design_and_manufacturing.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_machine_design_and_manufacturing.yaml @@ -1,3 +1,4 @@ dataset_name: machine_design_and_manufacturing include: _hard_kmmlu_yaml task: kmmlu_hard_machine_design_and_manufacturing +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_management.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_management.yaml index 8e9e866499e8d3287c107147472b1ceb89199525..d3f27519e2d3511152bc9ac778b8c7aa615b9cad 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_management.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_management.yaml @@ -1,3 +1,4 @@ dataset_name: management include: _hard_kmmlu_yaml task: kmmlu_hard_management +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_maritime_engineering.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_maritime_engineering.yaml index e68041d5097574d98f10f2ab18570ea8ba96a066..dec43bc8045ca1ac93c38e53b6b43904051a3722 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_maritime_engineering.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_maritime_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: maritime_engineering include: _hard_kmmlu_yaml task: kmmlu_hard_maritime_engineering +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_marketing.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_marketing.yaml index 54a62d6272955eb8701f8f821b68b01cbd409d1d..f86cfe17bc530b2e54c658b70da9d8f8499cc7d5 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_marketing.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_marketing.yaml @@ -1,3 +1,4 @@ dataset_name: marketing include: _hard_kmmlu_yaml task: kmmlu_hard_marketing +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_materials_engineering.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_materials_engineering.yaml index 4582b0f3b407a9f11a960826104c3aac196e7177..684120a077fa5616eb52cbdee9c1603b60ab135b 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_materials_engineering.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_materials_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: materials_engineering include: _hard_kmmlu_yaml task: kmmlu_hard_materials_engineering +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_math.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_math.yaml index e563717686f991baa06323a0e9f1d415a74df128..ed125f90bfa20a14fafd64e24c1f18d5ede0544d 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_math.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_math.yaml @@ -1,3 +1,4 @@ dataset_name: math include: _hard_kmmlu_yaml task: kmmlu_hard_math +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_mechanical_engineering.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_mechanical_engineering.yaml index 9b3adca0b644ef7f6a8ede8a2918a46f40707c1b..b6d00e2e2b960f6c5658d5a37af029ca9dea08e8 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_mechanical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_mechanical_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: mechanical_engineering include: _hard_kmmlu_yaml task: kmmlu_hard_mechanical_engineering +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_nondestructive_testing.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_nondestructive_testing.yaml index 21c25fc87b7e864ededa206454a357dba6ed5ed2..acf3ed9fd9e1909c058393d6230513ba34496cac 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_nondestructive_testing.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_nondestructive_testing.yaml @@ -1,3 +1,4 @@ dataset_name: nondestructive_testing include: _hard_kmmlu_yaml task: kmmlu_hard_nondestructive_testing +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_patent.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_patent.yaml index 3fcdcd96b136e0872cd530b5261760492b29a5e2..910f11c54c781cc0f443e83827e09b2d0790775d 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_patent.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_patent.yaml @@ -1,3 +1,4 @@ dataset_name: patent include: _hard_kmmlu_yaml task: kmmlu_hard_patent +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_political_science_and_sociology.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_political_science_and_sociology.yaml index 6bb907cb10792070f6043eeeed8f629cd503cbe9..7b7addfdb3f6defb0c41d8d62eb07ed35adbdc53 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_political_science_and_sociology.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_political_science_and_sociology.yaml @@ -1,3 +1,4 @@ dataset_name: political_science_and_sociology include: _hard_kmmlu_yaml task: kmmlu_hard_political_science_and_sociology +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_psychology.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_psychology.yaml index c79cef1f1c2b776fe20f95acd6bf80703a6f48af..a6d8b754e2bac711ba8a698c0dbb8c6ad8b962fe 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_psychology.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_psychology.yaml @@ -1,3 +1,4 @@ dataset_name: psychology include: _hard_kmmlu_yaml task: kmmlu_hard_psychology +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_public_safety.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_public_safety.yaml index 110bd147e7466fd766795e96aa5964097f28d314..8b04b78e59f49a477864206679bcc060ca87313d 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_public_safety.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_public_safety.yaml @@ -1,3 +1,4 @@ dataset_name: public_safety include: _hard_kmmlu_yaml task: kmmlu_hard_public_safety +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_railway_and_automotive_engineering.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_railway_and_automotive_engineering.yaml index 31b610f75ea9c23ff71a375c3396d8cfa695517d..358b7e36ab0701226f2331daac734d24e1f84402 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_railway_and_automotive_engineering.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_railway_and_automotive_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: railway_and_automotive_engineering include: _hard_kmmlu_yaml task: kmmlu_hard_railway_and_automotive_engineering +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_real_estate.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_real_estate.yaml index bd1b32c85885bdb7530bb80857e0da2ee0797136..9010e2a746f72c4798e2f632e806330280c8bbd5 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_real_estate.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_real_estate.yaml @@ -1,3 +1,4 @@ dataset_name: real_estate include: _hard_kmmlu_yaml task: kmmlu_hard_real_estate +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_refrigerating_machinery.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_refrigerating_machinery.yaml index 8c7dd139998fb4476dcbe08d1f95249fa340fbcd..5f03b70ba22ceff0fa5f5a59685f38aef3c0f33e 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_refrigerating_machinery.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_refrigerating_machinery.yaml @@ -1,3 +1,4 @@ dataset_name: refrigerating_machinery include: _hard_kmmlu_yaml task: kmmlu_hard_refrigerating_machinery +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_social_welfare.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_social_welfare.yaml index 12502a573e51dc7ab45fc42f6ee97e92e9b78b58..24f105e4677e7461087037e51f9f66add272fb35 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_social_welfare.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_social_welfare.yaml @@ -1,3 +1,4 @@ dataset_name: social_welfare include: _hard_kmmlu_yaml task: kmmlu_hard_social_welfare +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_taxation.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_taxation.yaml index f0f815abe4038e14321f0d04d26d9c411983e971..7d0bbf86c55f992089437285d57e3f8df5aecae6 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_taxation.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_taxation.yaml @@ -1,3 +1,4 @@ dataset_name: taxation include: _hard_kmmlu_yaml task: kmmlu_hard_taxation +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_telecommunications_and_wireless_technology.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_telecommunications_and_wireless_technology.yaml index 0cb519d11ec046aa947fef00738bdcc062c836fd..c1398c5f32d4b5635f21b627f05f7b8d7971f94b 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_telecommunications_and_wireless_technology.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_telecommunications_and_wireless_technology.yaml @@ -1,3 +1,4 @@ dataset_name: telecommunications_and_wireless_technology include: _hard_kmmlu_yaml task: kmmlu_hard_telecommunications_and_wireless_technology +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kormedmcqa/README.md b/lm_eval/tasks/kormedmcqa/README.md index b4eb11342731678ca361a739acd8352fb9417676..54a666a1a675fb21a66de8dd0e07bddbaa8efcce 100644 --- a/lm_eval/tasks/kormedmcqa/README.md +++ b/lm_eval/tasks/kormedmcqa/README.md @@ -25,20 +25,21 @@ Homepage: https://huggingface.co/datasets/sean0042/KorMedMCQA ### Groups and Tasks -* `kormedmcqa`: Runs `kormedmcqa_doctor`, `kormedmcqa_nurse`, and `kormedmcqa_pharm`. +* `kormedmcqa`: Runs `kormedmcqa_doctor`, `kormedmcqa_nurse`, `kormedmcqa_pharm`, and `kormedmcqa_dentist`. #### Tasks * `kormedmcqa_doctor`: `Official Korean Doctor Examination` * `kormedmcqa_nurse`: `Official Korean Nurse Examination` * `kormedmcqa_pharm`: `Official Korean Pharmacist Examination` +* `kormedmcqa_dentist`: `Official Korean Dentist Examination` ### Checklist For adding novel benchmarks/datasets to the library: -* [x] Is the task an existing benchmark in the literature? - * [x] Have you referenced the original paper that introduced the task? - * [x] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test? +* [ ] Is the task an existing benchmark in the literature? + * [ ] Have you referenced the original paper that introduced the task? + * [ ] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test? If other tasks on this dataset are already supported: diff --git a/lm_eval/tasks/kormedmcqa/_kormedmcqa.yaml b/lm_eval/tasks/kormedmcqa/_kormedmcqa.yaml index d6548334fe19609dc358d0dfb697d5888c10e351..cac2329e7547365ac78538211f7229701aa457ce 100644 --- a/lm_eval/tasks/kormedmcqa/_kormedmcqa.yaml +++ b/lm_eval/tasks/kormedmcqa/_kormedmcqa.yaml @@ -3,9 +3,10 @@ task: - kormedmcqa_doctor - kormedmcqa_nurse - kormedmcqa_pharm + - kormedmcqa_dentist aggregate_metric_list: - metric: exact_match aggregation: mean weight_by_size: true metadata: - version: 0.0 + version: 2.0 diff --git a/lm_eval/tasks/kormedmcqa/kormedmcqa_doctor.yaml b/lm_eval/tasks/kormedmcqa/_template_yaml similarity index 62% rename from lm_eval/tasks/kormedmcqa/kormedmcqa_doctor.yaml rename to lm_eval/tasks/kormedmcqa/_template_yaml index d130dbe8114a7028d647a83229b75813988296d1..1dae2062d184e8190e80ea686574849825439493 100644 --- a/lm_eval/tasks/kormedmcqa/kormedmcqa_doctor.yaml +++ b/lm_eval/tasks/kormedmcqa/_template_yaml @@ -1,10 +1,10 @@ -task : kormedmcqa_doctor dataset_path : sean0042/KorMedMCQA -dataset_name : doctor test_split : test -fewshot_split : dev +fewshot_split : fewshot fewshot_config: sampler: first_n + doc_to_text: "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nE. {{E}}\n정답: {{['A', 'B', 'C', 'D', 'E'][answer-1]}}\n\n" + doc_to_target: "" output_type: generate_until doc_to_text: "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nE. {{E}}\n정답:" doc_to_target: "{{['A', 'B', 'C', 'D', 'E'][answer-1]}}" @@ -15,12 +15,19 @@ metric_list: ignore_case: true ignore_punctuation: true regexes_to_ignore: - - " " + - " " + - "\n" generation_kwargs: until: - "Q:" - - "\n\n" - "" + - "<|im_end|>" - "." + - "\n\n" do_sample: false temperature: 0.0 + max_gen_toks: 1024 +metadata: + version: 2.0 +dataset_kwargs: + trust_remote_code: true diff --git a/lm_eval/tasks/kormedmcqa/dentist.yaml b/lm_eval/tasks/kormedmcqa/dentist.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6a46c7715ab0ca72aaf8eed80532db467aa188bc --- /dev/null +++ b/lm_eval/tasks/kormedmcqa/dentist.yaml @@ -0,0 +1,3 @@ +include: _template_yaml +dataset_name: dentist +task: kormedmcqa_dentist diff --git a/lm_eval/tasks/kormedmcqa/doctor.yaml b/lm_eval/tasks/kormedmcqa/doctor.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aac30e4cdf20d9fb536aa91f07474e99131d6eaa --- /dev/null +++ b/lm_eval/tasks/kormedmcqa/doctor.yaml @@ -0,0 +1,3 @@ +include: _template_yaml +dataset_name: doctor +task: kormedmcqa_doctor diff --git a/lm_eval/tasks/kormedmcqa/kormedmcqa_nurse.yaml b/lm_eval/tasks/kormedmcqa/kormedmcqa_nurse.yaml deleted file mode 100644 index 026b6217addc6e5d537f389e89a0f95a5dc0dd09..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/kormedmcqa/kormedmcqa_nurse.yaml +++ /dev/null @@ -1,26 +0,0 @@ -task : kormedmcqa_nurse -dataset_path : sean0042/KorMedMCQA -dataset_name : nurse -test_split : test -fewshot_split : dev -fewshot_config: - sampler: first_n -output_type: generate_until -doc_to_text: "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nE. {{E}}\n정답:" -doc_to_target: "{{['A', 'B', 'C', 'D', 'E'][answer-1]}}" -metric_list: - - metric: exact_match - aggregation: mean - higher_is_better: true - ignore_case: true - ignore_punctuation: true - regexes_to_ignore: - - " " -generation_kwargs: - until: - - "Q:" - - "\n\n" - - "" - - "." - do_sample: false - temperature: 0.0 diff --git a/lm_eval/tasks/kormedmcqa/kormedmcqa_pharm.yaml b/lm_eval/tasks/kormedmcqa/kormedmcqa_pharm.yaml deleted file mode 100644 index 91279dd7057d25ee94c7b99529f3521960a29265..0000000000000000000000000000000000000000 --- a/lm_eval/tasks/kormedmcqa/kormedmcqa_pharm.yaml +++ /dev/null @@ -1,26 +0,0 @@ -task : kormedmcqa_pharm -dataset_path : sean0042/KorMedMCQA -dataset_name : pharm -test_split : test -fewshot_split : dev -fewshot_config: - sampler: first_n -output_type: generate_until -doc_to_text: "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nE. {{E}}\n정답:" -doc_to_target: "{{['A', 'B', 'C', 'D', 'E'][answer-1]}}" -metric_list: - - metric: exact_match - aggregation: mean - higher_is_better: true - ignore_case: true - ignore_punctuation: true - regexes_to_ignore: - - " " -generation_kwargs: - until: - - "Q:" - - "\n\n" - - "" - - "." - do_sample: false - temperature: 0.0 diff --git a/lm_eval/tasks/kormedmcqa/nurse.yaml b/lm_eval/tasks/kormedmcqa/nurse.yaml new file mode 100644 index 0000000000000000000000000000000000000000..95894a5d73f17d39fe1ae0061d8dca7045a863df --- /dev/null +++ b/lm_eval/tasks/kormedmcqa/nurse.yaml @@ -0,0 +1,3 @@ +include: _template_yaml +dataset_name: nurse +task: kormedmcqa_nurse diff --git a/lm_eval/tasks/kormedmcqa/pharm.yaml b/lm_eval/tasks/kormedmcqa/pharm.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8075fae3f8666e93eeefc0dafaf1b70adb48f6af --- /dev/null +++ b/lm_eval/tasks/kormedmcqa/pharm.yaml @@ -0,0 +1,3 @@ +include: _template_yaml +dataset_name: pharm +task: kormedmcqa_pharm diff --git a/lm_eval/tasks/leaderboard/ifeval/instructions.py b/lm_eval/tasks/leaderboard/ifeval/instructions.py index a79cbba4f5e2fec10b8c0525a215b5e967c6fcfc..9a7bcce13b0f29b829f21dea14b8f7ce5baeaac1 100644 --- a/lm_eval/tasks/leaderboard/ifeval/instructions.py +++ b/lm_eval/tasks/leaderboard/ifeval/instructions.py @@ -722,7 +722,7 @@ class RephraseChecker(Instruction): if not self.is_change(value): raise ValueError( - f"value {value} does not contain " "changes in the form of *change me*." + f"value {value} does not contain changes in the form of *change me*." ) response_without_changes = self.strip_changes(value) diff --git a/lm_eval/tasks/leaderboard/ifeval/instructions_util.py b/lm_eval/tasks/leaderboard/ifeval/instructions_util.py index 9ca2d4de4dd013f9766eb877a780ca40061887d6..6993e41808401f0512c277886dae8683750491bd 100644 --- a/lm_eval/tasks/leaderboard/ifeval/instructions_util.py +++ b/lm_eval/tasks/leaderboard/ifeval/instructions_util.py @@ -34,9 +34,9 @@ NLTK_MIN_VERSION = "3.9.1" def download_nltk_resources(): """Download 'punkt' if not already installed""" nltk_version = pkg_resources.get_distribution("nltk").version - assert ( - version.parse(nltk_version) >= version.parse(NLTK_MIN_VERSION) - ), f"`nltk` version {nltk_version} is not >= {NLTK_MIN_VERSION}. Please update `nltk` before proceeding--older versions are vulnerable to a remote code execution vulnerability." + assert version.parse(nltk_version) >= version.parse(NLTK_MIN_VERSION), ( + f"`nltk` version {nltk_version} is not >= {NLTK_MIN_VERSION}. Please update `nltk` before proceeding--older versions are vulnerable to a remote code execution vulnerability." + ) try: nltk.data.find("tokenizers/punkt_tab") diff --git a/lm_eval/tasks/leaderboard/musr/utils.py b/lm_eval/tasks/leaderboard/musr/utils.py index 1d0a7d1ca0c98a4ae0641b0520e693826090b7b9..eb17a52914c3230367eca37d9e51e1864615b249 100644 --- a/lm_eval/tasks/leaderboard/musr/utils.py +++ b/lm_eval/tasks/leaderboard/musr/utils.py @@ -8,7 +8,7 @@ def doc_to_choice(doc): return ast.literal_eval(doc["choices"]) -DOC_TO_TEXT = "{narrative}\n\n" "{question}\n\n" "{choices}\n" "Answer:" +DOC_TO_TEXT = "{narrative}\n\n{question}\n\n{choices}\nAnswer:" def doc_to_text(doc): @@ -17,7 +17,7 @@ def doc_to_text(doc): """ choices = "" for i, choice in enumerate(ast.literal_eval(doc["choices"])): - choices += f"{i+1} - {choice}\n" + choices += f"{i + 1} - {choice}\n" text = DOC_TO_TEXT.format( narrative=doc["narrative"], question=doc["question"], choices=choices diff --git a/lm_eval/tasks/lingoly/utils.py b/lm_eval/tasks/lingoly/utils.py index 21051d7798ba94bfab6f8867496b3977f7a60424..b4044228435c986605260512086513c8b9137af3 100644 --- a/lm_eval/tasks/lingoly/utils.py +++ b/lm_eval/tasks/lingoly/utils.py @@ -14,13 +14,13 @@ def load_questionsheet(qsheet: dict, no_context: bool = False): all_subquestions += "\n" if no_context: - prompt = f"""{qsheet['preamble']} + prompt = f"""{qsheet["preamble"]} {all_subquestions} """ else: - prompt = f"""{qsheet['preamble']} - {qsheet['context']} + prompt = f"""{qsheet["preamble"]} + {qsheet["context"]} {all_subquestions} """ diff --git a/lm_eval/tasks/llama3/README.md b/lm_eval/tasks/llama3/README.md index 5d45cbb47a33a3c19259c6781d0c8ef7fea1d150..1976b62a8978280c949074a4f3106322c49c9cc0 100644 --- a/lm_eval/tasks/llama3/README.md +++ b/lm_eval/tasks/llama3/README.md @@ -2,43 +2,48 @@ ### Paper -Title: `` +Title: LLAMA Evals -Abstract: `` +Abstract: Evals reproducing those provided by the LLAMA team in the Hugging Face repo. +`Short description of paper / benchmark goes here:` -Homepage: `` - +Homepage: `https://huggingface.co/collections/meta-llama/llama-31-evals-66a2c5a14c2093e58298ac7f` +Note: The tasks are formatted to be run with apply_chat_template and fewshot_as_multiturn. ### Citation ``` - +BibTeX-formatted citation goes here ``` ### Groups, Tags, and Tasks #### Groups +* `group_name`: `Short description` +#### Tags -#### Subgroups +* `tag_name`: `Short description` +#### Tasks ### Tasks -* `llama_arc_challenge`: 25-shot multiple-choice ARC challenge. * `mgsm_chat`: 0-shot mgsm benchmark. Use with chat-template. +* `mmlu_llama`: `generation variant of MMLU` +* `arc_chalenge_chat`: `generation variant of ARC-Challenge using MMLU format` ### Checklist For adding novel benchmarks/datasets to the library: -* [x] Is the task an existing benchmark in the literature? - * [x] Have you referenced the original paper that introduced the task? - * [x] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test? +* [ ] Is the task an existing benchmark in the literature? + * [ ] Have you referenced the original paper that introduced the task? + * [ ] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test? If other tasks on this dataset are already supported: -* [x] Is the "Main" variant of this task clearly denoted? -* [x] Have you provided a short sentence in a README on what each new variant adds / evaluates? -* [x] Have you noted which, if any, published evaluation setups are matched by this variant? +* [ ] Is the "Main" variant of this task clearly denoted? +* [ ] Have you provided a short sentence in a README on what each new variant adds / evaluates? +* [ ] Have you noted which, if any, published evaluation setups are matched by this variant? diff --git a/lm_eval/tasks/llama3/instruct/mmlu/_continuation_template_yaml b/lm_eval/tasks/llama3/instruct/mmlu/_continuation_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..7afb094be4954ebc954e727d72aaadc96d482c90 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/_continuation_template_yaml @@ -0,0 +1,32 @@ +dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split +output_type: generate_until +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +doc_to_text: "Given the following question and four candidate answers (A, B, C and D), choose the best answer.\nQuestion: {{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nYour response should end with \"The best answer is [the_answer_letter]\" where the [the_answer_letter] is one of A, B, C or D." +gen_prefix: "The best answer is" +doc_to_target: "{{['A.','B.','C.','D.'][answer]}}" +num_fewshot: 5 +metric_list: + - metric: exact_match + aggregation: mean + higher_is_better: true + ignore_case: true + ignore_punctuation: true + regexes_to_ignore: + - "\\$" + - "\\.$" +generation_kwargs: + until: + - "." + max_gen_toks: 10 +filter_list: + - name: strict_match + filter: + - function: remove_whitespace + - function: take_first +metadata: + version: 1.0 +dataset_kwargs: + trust_remote_code: true diff --git a/lm_eval/tasks/llama3/instruct/mmlu/_mmlu_humanities.yaml b/lm_eval/tasks/llama3/instruct/mmlu/_mmlu_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e02c3e98d7cbbb8a4565befb1855689b4e641843 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/_mmlu_humanities.yaml @@ -0,0 +1,11 @@ +group: mmlu_llama_humanities +group_alias: humanities +task: + - mmlu_llama_humanities_tasks +aggregate_metric_list: + - metric: exact_match + aggregation: mean + weight_by_size: True + filter_list: [strict_match] +metadata: + version: 1 diff --git a/lm_eval/tasks/llama3/instruct/mmlu/_mmlu_other.yaml b/lm_eval/tasks/llama3/instruct/mmlu/_mmlu_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..baa9742d9913ffa8c680c1882e668cdae491bb66 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/_mmlu_other.yaml @@ -0,0 +1,11 @@ +group: mmlu_llama_other +group_alias: other +task: + - mmlu_llama_other_tasks +aggregate_metric_list: + - metric: exact_match + aggregation: mean + weight_by_size: True + filter_list: [strict_match] +metadata: + version: 1 diff --git a/lm_eval/tasks/llama3/instruct/mmlu/_mmlu_social_sciences.yaml b/lm_eval/tasks/llama3/instruct/mmlu/_mmlu_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6d4860a2a3ce4e963a04ffe267b27a8110be012b --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/_mmlu_social_sciences.yaml @@ -0,0 +1,11 @@ +group: mmlu_llama_social_sciences +group_alias: social sciences +task: + - mmlu_llama_social_sciences_tasks +aggregate_metric_list: + - metric: exact_match + aggregation: mean + weight_by_size: True + filter_list: [strict_match] +metadata: + version: 1 diff --git a/lm_eval/tasks/llama3/instruct/mmlu/_mmlu_stem.yaml b/lm_eval/tasks/llama3/instruct/mmlu/_mmlu_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f0c0c829c146f7924e638fbce0aa2e485c77592b --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/_mmlu_stem.yaml @@ -0,0 +1,11 @@ +group: mmlu_llama_stem +group_alias: stem +task: + - mmlu_llama_stem_tasks +aggregate_metric_list: + - metric: exact_match + aggregation: mean + weight_by_size: True + filter_list: [strict_match] +metadata: + version: 0 diff --git a/lm_eval/tasks/llama3/instruct/mmlu/llama.yaml b/lm_eval/tasks/llama3/instruct/mmlu/llama.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a4fdeedab6d11629b64a78d9f93a1fb186d7898a --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/llama.yaml @@ -0,0 +1,13 @@ +group: mmlu_llama +task: + - mmlu_llama_stem + - mmlu_llama_other + - mmlu_llama_social_sciences + - mmlu_llama_humanities +aggregate_metric_list: + - metric: exact_match + aggregation: mean + weight_by_size: True + filter_list: [strict_match] +metadata: + version: 1 diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_abstract_algebra.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..21cef5c0530e2dfc5a13f1d74f3e6ccc6c4dfe25 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_abstract_algebra.yaml @@ -0,0 +1,5 @@ +"dataset_name": "abstract_algebra" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_stem_tasks" +"task": "mmlu_llama_abstract_algebra" +"task_alias": "abstract algebra" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_anatomy.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fdcd5c4d8cbd7b7b3669f294168f34763a0dd1d7 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_anatomy.yaml @@ -0,0 +1,5 @@ +"dataset_name": "anatomy" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_stem_tasks" +"task": "mmlu_llama_anatomy" +"task_alias": "anatomy" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_astronomy.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..79fe806d3c68e3ac54c3ebe4b9b77f83be22edbd --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_astronomy.yaml @@ -0,0 +1,5 @@ +"dataset_name": "astronomy" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_stem_tasks" +"task": "mmlu_llama_astronomy" +"task_alias": "astronomy" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_business_ethics.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b3e060b2f3a49a8c2d03a33f730bc3dab5551e6b --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_business_ethics.yaml @@ -0,0 +1,5 @@ +"dataset_name": "business_ethics" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_other_tasks" +"task": "mmlu_llama_business_ethics" +"task_alias": "business ethics" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5460bcfdc9ec506ece2e5030c8ddfb8bd3267ece --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +"dataset_name": "clinical_knowledge" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_other_tasks" +"task": "mmlu_llama_clinical_knowledge" +"task_alias": "clinical knowledge" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_college_biology.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0288106f3bf45df5e50c723edf0fdcef41394e65 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_college_biology.yaml @@ -0,0 +1,5 @@ +"dataset_name": "college_biology" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_stem_tasks" +"task": "mmlu_llama_college_biology" +"task_alias": "college biology" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_college_chemistry.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1dbe75c48fc7def89ad9b50388990d72751d7b67 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_college_chemistry.yaml @@ -0,0 +1,5 @@ +"dataset_name": "college_chemistry" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_stem_tasks" +"task": "mmlu_llama_college_chemistry" +"task_alias": "college chemistry" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_college_computer_science.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2493a798ede208654896d74f408859b61e1f55f5 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_college_computer_science.yaml @@ -0,0 +1,5 @@ +"dataset_name": "college_computer_science" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_stem_tasks" +"task": "mmlu_llama_college_computer_science" +"task_alias": "college computer science" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_college_mathematics.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8fb8ab3b744162fc95ef15ab25115a22f2c56e5e --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_college_mathematics.yaml @@ -0,0 +1,5 @@ +"dataset_name": "college_mathematics" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_stem_tasks" +"task": "mmlu_llama_college_mathematics" +"task_alias": "college mathematics" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_college_medicine.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..911777b4fe6df29e5c9a03756f5a0c7248d7f5e2 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_college_medicine.yaml @@ -0,0 +1,5 @@ +"dataset_name": "college_medicine" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_other_tasks" +"task": "mmlu_llama_college_medicine" +"task_alias": "college medicine" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_college_physics.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..70f6b99572de6a44143120f3437823ba533741dc --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_college_physics.yaml @@ -0,0 +1,5 @@ +"dataset_name": "college_physics" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_stem_tasks" +"task": "mmlu_llama_college_physics" +"task_alias": "college physics" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_computer_security.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..893b0ac972c6f9fc9740ee1bd45c8fef28469a0f --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_computer_security.yaml @@ -0,0 +1,5 @@ +"dataset_name": "computer_security" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_stem_tasks" +"task": "mmlu_llama_computer_security" +"task_alias": "computer security" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_conceptual_physics.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0e31df629a3b2f62f6e736d3fa20b8ef44a97af1 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_conceptual_physics.yaml @@ -0,0 +1,5 @@ +"dataset_name": "conceptual_physics" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_stem_tasks" +"task": "mmlu_llama_conceptual_physics" +"task_alias": "conceptual physics" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_econometrics.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..44a57ce456e57750791fe2c2595e1f8d13247f3a --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_econometrics.yaml @@ -0,0 +1,5 @@ +"dataset_name": "econometrics" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_social_sciences_tasks" +"task": "mmlu_llama_econometrics" +"task_alias": "econometrics" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_electrical_engineering.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..064878388d1128df31aa0c232798b7e4c4f6b78f --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_electrical_engineering.yaml @@ -0,0 +1,5 @@ +"dataset_name": "electrical_engineering" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_stem_tasks" +"task": "mmlu_llama_electrical_engineering" +"task_alias": "electrical engineering" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e2790ebeb5506ab2676b5aa58b3cac63faaa3c57 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +"dataset_name": "elementary_mathematics" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_stem_tasks" +"task": "mmlu_llama_elementary_mathematics" +"task_alias": "elementary mathematics" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_formal_logic.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..63cd15ce0c7883db84e5a67eb92cd6ada9d62639 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_formal_logic.yaml @@ -0,0 +1,5 @@ +"dataset_name": "formal_logic" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_humanities_tasks" +"task": "mmlu_llama_formal_logic" +"task_alias": "formal logic" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_global_facts.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..23f4c3f00b6bbefaf9e98669457bf718fdbda283 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_global_facts.yaml @@ -0,0 +1,5 @@ +"dataset_name": "global_facts" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_other_tasks" +"task": "mmlu_llama_global_facts" +"task_alias": "global facts" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_biology.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1e414106255960b4fbc6fffa9aa98fe919d6c2ae --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_biology.yaml @@ -0,0 +1,5 @@ +"dataset_name": "high_school_biology" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_stem_tasks" +"task": "mmlu_llama_high_school_biology" +"task_alias": "high school biology" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9dd100c106a68d809ada72aaa2605e77a24d305e --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +"dataset_name": "high_school_chemistry" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_stem_tasks" +"task": "mmlu_llama_high_school_chemistry" +"task_alias": "high school chemistry" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..073fa7799855681367307909f5b9708d95dc4d62 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +"dataset_name": "high_school_computer_science" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_stem_tasks" +"task": "mmlu_llama_high_school_computer_science" +"task_alias": "high school computer science" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_european_history.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7b34c0aa70a141a93fed82a4a5215e40a613366c --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_european_history.yaml @@ -0,0 +1,5 @@ +"dataset_name": "high_school_european_history" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_humanities_tasks" +"task": "mmlu_llama_high_school_european_history" +"task_alias": "high school european history" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_geography.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dcab35b96c7893630ef03f24d5e42cca98d9a1b9 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_geography.yaml @@ -0,0 +1,5 @@ +"dataset_name": "high_school_geography" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_social_sciences_tasks" +"task": "mmlu_llama_high_school_geography" +"task_alias": "high school geography" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e8f7f40d5b347fb5fe45d5dea8b21ae93123637d --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +"dataset_name": "high_school_government_and_politics" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_social_sciences_tasks" +"task": "mmlu_llama_high_school_government_and_politics" +"task_alias": "high school government and politics" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c2f8cbb127d034065c723a148015d214227b76c8 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +"dataset_name": "high_school_macroeconomics" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_social_sciences_tasks" +"task": "mmlu_llama_high_school_macroeconomics" +"task_alias": "high school macroeconomics" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d5ffff4c1f962f61887091e849536f215c035262 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +"dataset_name": "high_school_mathematics" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_stem_tasks" +"task": "mmlu_llama_high_school_mathematics" +"task_alias": "high school mathematics" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..76344bbea35764687c3c9b5aab78129a06c10f6e --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +"dataset_name": "high_school_microeconomics" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_social_sciences_tasks" +"task": "mmlu_llama_high_school_microeconomics" +"task_alias": "high school microeconomics" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_physics.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3d63c025add7c0424e0db219b7f12c00f43f33c3 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_physics.yaml @@ -0,0 +1,5 @@ +"dataset_name": "high_school_physics" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_stem_tasks" +"task": "mmlu_llama_high_school_physics" +"task_alias": "high school physics" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_psychology.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c894e52e3b498b1b4cb1fe83110512ecc272ec73 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_psychology.yaml @@ -0,0 +1,5 @@ +"dataset_name": "high_school_psychology" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_social_sciences_tasks" +"task": "mmlu_llama_high_school_psychology" +"task_alias": "high school psychology" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_statistics.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0d7922dc6addc5f3d57919ac9f6cd7f4ba3d9cf9 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_statistics.yaml @@ -0,0 +1,5 @@ +"dataset_name": "high_school_statistics" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_stem_tasks" +"task": "mmlu_llama_high_school_statistics" +"task_alias": "high school statistics" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_us_history.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1de61e8bf73f7750b6d96ab1d177db1067a4cd5a --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_us_history.yaml @@ -0,0 +1,5 @@ +"dataset_name": "high_school_us_history" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_humanities_tasks" +"task": "mmlu_llama_high_school_us_history" +"task_alias": "high school us history" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_world_history.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ef7759866f475b5df6eafa2e7b82f8c591c0ea84 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_high_school_world_history.yaml @@ -0,0 +1,5 @@ +"dataset_name": "high_school_world_history" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_humanities_tasks" +"task": "mmlu_llama_high_school_world_history" +"task_alias": "high school world history" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_human_aging.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0f40c3f82054c2063f58995323d975ce8e6b2b68 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_human_aging.yaml @@ -0,0 +1,5 @@ +"dataset_name": "human_aging" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_other_tasks" +"task": "mmlu_llama_human_aging" +"task_alias": "human aging" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_human_sexuality.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dbafdb70eaefec40853f46f70313d8b581ecb670 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_human_sexuality.yaml @@ -0,0 +1,5 @@ +"dataset_name": "human_sexuality" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_social_sciences_tasks" +"task": "mmlu_llama_human_sexuality" +"task_alias": "human sexuality" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_international_law.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bba7fe02a4c813b20c1756572af1e95a9003a611 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_international_law.yaml @@ -0,0 +1,5 @@ +"dataset_name": "international_law" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_humanities_tasks" +"task": "mmlu_llama_international_law" +"task_alias": "international law" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_jurisprudence.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..54987158b1ca4c8e3404759652fbdcf9ea7fa998 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_jurisprudence.yaml @@ -0,0 +1,5 @@ +"dataset_name": "jurisprudence" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_humanities_tasks" +"task": "mmlu_llama_jurisprudence" +"task_alias": "jurisprudence" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_logical_fallacies.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7c3185164d3975e25ef8dec78c78e4e80940a448 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_logical_fallacies.yaml @@ -0,0 +1,5 @@ +"dataset_name": "logical_fallacies" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_humanities_tasks" +"task": "mmlu_llama_logical_fallacies" +"task_alias": "logical fallacies" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_machine_learning.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2347f96dcbd3bc122894598cb64e6a3747d20ddc --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_machine_learning.yaml @@ -0,0 +1,5 @@ +"dataset_name": "machine_learning" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_stem_tasks" +"task": "mmlu_llama_machine_learning" +"task_alias": "machine learning" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_management.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..31dbcb38c89f2d5b55370c7059ce7e6794861460 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_management.yaml @@ -0,0 +1,5 @@ +"dataset_name": "management" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_other_tasks" +"task": "mmlu_llama_management" +"task_alias": "management" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_marketing.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4fb88081bc00a98c9d385b7cab3047a1210f26f8 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_marketing.yaml @@ -0,0 +1,5 @@ +"dataset_name": "marketing" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_other_tasks" +"task": "mmlu_llama_marketing" +"task_alias": "marketing" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_medical_genetics.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..44509f7c1529f20a6a525c437dd66591d8640874 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_medical_genetics.yaml @@ -0,0 +1,5 @@ +"dataset_name": "medical_genetics" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_other_tasks" +"task": "mmlu_llama_medical_genetics" +"task_alias": "medical genetics" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_miscellaneous.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..09f3c11d6d928b6dcb6bb67eea4dec4c8ee64828 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_miscellaneous.yaml @@ -0,0 +1,5 @@ +"dataset_name": "miscellaneous" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_other_tasks" +"task": "mmlu_llama_miscellaneous" +"task_alias": "miscellaneous" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_moral_disputes.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b5ade37e8425d254c40903a8e0ae40fade90e9db --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_moral_disputes.yaml @@ -0,0 +1,5 @@ +"dataset_name": "moral_disputes" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_humanities_tasks" +"task": "mmlu_llama_moral_disputes" +"task_alias": "moral disputes" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_moral_scenarios.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..339046e4513073d4f3f3fb444fe3d66d269add4e --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_moral_scenarios.yaml @@ -0,0 +1,5 @@ +"dataset_name": "moral_scenarios" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_humanities_tasks" +"task": "mmlu_llama_moral_scenarios" +"task_alias": "moral scenarios" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_nutrition.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d425d51a8b3d066684086ec4f4aa1205270c84aa --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_nutrition.yaml @@ -0,0 +1,5 @@ +"dataset_name": "nutrition" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_other_tasks" +"task": "mmlu_llama_nutrition" +"task_alias": "nutrition" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_philosophy.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cf8cf7f66c36ff54755de713e2af961f3ba289d6 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_philosophy.yaml @@ -0,0 +1,5 @@ +"dataset_name": "philosophy" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_humanities_tasks" +"task": "mmlu_llama_philosophy" +"task_alias": "philosophy" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_prehistory.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0190b8321901ca5a6fe92cc1cb03feb0f919b1c3 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_prehistory.yaml @@ -0,0 +1,5 @@ +"dataset_name": "prehistory" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_humanities_tasks" +"task": "mmlu_llama_prehistory" +"task_alias": "prehistory" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_professional_accounting.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8d0ddbd466d778794571e0a613b0bee3b2c80a80 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_professional_accounting.yaml @@ -0,0 +1,5 @@ +"dataset_name": "professional_accounting" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_other_tasks" +"task": "mmlu_llama_professional_accounting" +"task_alias": "professional accounting" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_professional_law.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f351cfe5881829f478dacdfb79f27ef9fd04712e --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_professional_law.yaml @@ -0,0 +1,5 @@ +"dataset_name": "professional_law" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_humanities_tasks" +"task": "mmlu_llama_professional_law" +"task_alias": "professional law" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_professional_medicine.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..73ec7b834b4d464b209fa85a452b5e1730553909 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_professional_medicine.yaml @@ -0,0 +1,5 @@ +"dataset_name": "professional_medicine" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_other_tasks" +"task": "mmlu_llama_professional_medicine" +"task_alias": "professional medicine" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_professional_psychology.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..12ceda4d62ac4ebb3a103a3aa95ebb3efb893f5b --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_professional_psychology.yaml @@ -0,0 +1,5 @@ +"dataset_name": "professional_psychology" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_social_sciences_tasks" +"task": "mmlu_llama_professional_psychology" +"task_alias": "professional psychology" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_public_relations.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..52f859ac30552e208b7978c0081a78fbd8a219e4 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_public_relations.yaml @@ -0,0 +1,5 @@ +"dataset_name": "public_relations" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_social_sciences_tasks" +"task": "mmlu_llama_public_relations" +"task_alias": "public relations" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_security_studies.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ba238fd0b4a7214de8e6aca69dd209d98087b17f --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_security_studies.yaml @@ -0,0 +1,5 @@ +"dataset_name": "security_studies" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_social_sciences_tasks" +"task": "mmlu_llama_security_studies" +"task_alias": "security studies" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_sociology.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8baeec70e42f211f537140615396b06a09a5bec9 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_sociology.yaml @@ -0,0 +1,5 @@ +"dataset_name": "sociology" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_social_sciences_tasks" +"task": "mmlu_llama_sociology" +"task_alias": "sociology" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0c91e1c5d0cb59a1cb545414dba9a3b27f2eb526 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +"dataset_name": "us_foreign_policy" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_social_sciences_tasks" +"task": "mmlu_llama_us_foreign_policy" +"task_alias": "us foreign policy" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_virology.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a8dfc6b707df711e531782e0cc2ae4e9042e577e --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_virology.yaml @@ -0,0 +1,5 @@ +"dataset_name": "virology" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_other_tasks" +"task": "mmlu_llama_virology" +"task_alias": "virology" diff --git a/lm_eval/tasks/llama3/instruct/mmlu/mmlu_world_religions.yaml b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0a6ff8fe6709ff595c8c8331695cbf8ce81a2ae7 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu/mmlu_world_religions.yaml @@ -0,0 +1,5 @@ +"dataset_name": "world_religions" +"include": "_continuation_template_yaml" +"tag": "mmlu_llama_humanities_tasks" +"task": "mmlu_llama_world_religions" +"task_alias": "world religions" diff --git a/lm_eval/tasks/llama3/instruct/mmlu_pro/_default_template_yaml b/lm_eval/tasks/llama3/instruct/mmlu_pro/_default_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..e959aea405d5b3a149f720f0f0fd53a3696e14c5 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu_pro/_default_template_yaml @@ -0,0 +1,34 @@ +dataset_path: TIGER-Lab/MMLU-Pro +output_type: generate_until +test_split: test +fewshot_split: validation +fewshot_config: + sampler: first_n + doc_to_target: !function utils.fewshot_to_text +doc_to_text: "{% set letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' %}Given the following question and candidate answers, choose the best answer.\nQuestion: {{question.strip()}}\n{% for choice in options %}{{letters[loop.index0]}}. {{choice}}\n{% endfor %}\nYour response should end with \"The best answer is [the_answer_letter].\" where the [the_answer_letter] is a letter from the provided choices.\n\nLet's think step by step." +doc_to_target: answer +num_fewshot: 5 +metric_list: + - metric: exact_match + aggregation: mean + higher_is_better: true + ignore_case: true + ignore_punctuation: true + regexes_to_ignore: + - "\\$" + - "\\.$" +generation_kwargs: + until: + - "." + max_gen_toks: 1024 +filter_list: + - name: strict_match + filter: + - function: "regex" + regex_pattern: "[tT]he best answer is ([A-Z])" + group_select: -1 + - function: take_first +metadata: + version: 1.0 +dataset_kwargs: + trust_remote_code: true diff --git a/lm_eval/tasks/llama3/instruct/mmlu_pro/_mmlu_pro.yaml b/lm_eval/tasks/llama3/instruct/mmlu_pro/_mmlu_pro.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8351c55c7ad7d186675892b770f15e84c2de8716 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu_pro/_mmlu_pro.yaml @@ -0,0 +1,23 @@ +group: mmlu_pro_llama +task: + - mmlu_pro_llama_biology + - mmlu_pro_llama_business + - mmlu_pro_llama_chemistry + - mmlu_pro_llama_computer_science + - mmlu_pro_llama_economics + - mmlu_pro_llama_engineering + - mmlu_pro_llama_health + - mmlu_pro_llama_history + - mmlu_pro_llama_law + - mmlu_pro_llama_math + - mmlu_pro_llama_other + - mmlu_pro_llama_philosophy + - mmlu_pro_llama_physics + - mmlu_pro_llama_psychology +aggregate_metric_list: + - aggregation: mean + metric: exact_match + weight_by_size: true + filter_list: custom-extract +metadata: + version: 1.0 diff --git a/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_biology.yaml b/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ffcbffc8d63f02cd01218dcf88fc980f581bb804 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_biology.yaml @@ -0,0 +1,4 @@ +include: "_default_template_yaml" +task: "mmlu_pro_llama_biology" +task_alias: "biology" +process_docs: !function utils.process_biology diff --git a/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_business.yaml b/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_business.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fdfe4ff84eb3bc2c2d9c97ef31c6297e60865c47 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_business.yaml @@ -0,0 +1,4 @@ +include: "_default_template_yaml" +task: "mmlu_pro_llama_business" +task_alias: "business" +process_docs: !function utils.process_business diff --git a/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_chemistry.yaml b/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cbb85149af42a5e0582a4d167ac34eb0e42563f2 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_chemistry.yaml @@ -0,0 +1,4 @@ +include: "_default_template_yaml" +task: "mmlu_pro_llama_chemistry" +task_alias: "chemistry" +process_docs: !function utils.process_chemistry diff --git a/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_computer_science.yaml b/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f7d1e14442b4cde5119cc827764528666e6834b6 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_computer_science.yaml @@ -0,0 +1,4 @@ +include: "_default_template_yaml" +task: "mmlu_pro_llama_computer_science" +task_alias: "computer_science" +process_docs: !function utils.process_computer_science diff --git a/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_economics.yaml b/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_economics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f58272eb8e131773c92a2e0da0b291b040494f7a --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_economics.yaml @@ -0,0 +1,4 @@ +include: "_default_template_yaml" +task: "mmlu_pro_llama_economics" +task_alias: "economics" +process_docs: !function utils.process_economics diff --git a/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_engineering.yaml b/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fb75ecb2b92fe81ce07be44c9fcb0d0c9d4b3517 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_engineering.yaml @@ -0,0 +1,4 @@ +include: "_default_template_yaml" +task: "mmlu_pro_llama_engineering" +task_alias: "engineering" +process_docs: !function utils.process_engineering diff --git a/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_health.yaml b/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_health.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c95eba37835f53000a1cba3356c153947651a297 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_health.yaml @@ -0,0 +1,4 @@ +include: "_default_template_yaml" +task: "mmlu_pro_llama_health" +task_alias: "health" +process_docs: !function utils.process_health diff --git a/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_history.yaml b/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5dbe3b6831b13887eea8a5ca10a35baeb019a4e8 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_history.yaml @@ -0,0 +1,4 @@ +include: "_default_template_yaml" +task: "mmlu_pro_llama_history" +task_alias: "history" +process_docs: !function utils.process_history diff --git a/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_law.yaml b/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a3de3b6b77ee7b09abdaffe718bf8afbbe067319 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_law.yaml @@ -0,0 +1,4 @@ +include: "_default_template_yaml" +task: "mmlu_pro_llama_law" +task_alias: "law" +process_docs: !function utils.process_law diff --git a/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_math.yaml b/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_math.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3d78f4d43634b6d8a3d654d7357ad69c45d9890d --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_math.yaml @@ -0,0 +1,4 @@ +include: "_default_template_yaml" +task: "mmlu_pro_llama_math" +task_alias: "math" +process_docs: !function utils.process_math diff --git a/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_other.yaml b/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cf7910c2e095d398b6954e29044a75b565983bb6 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_other.yaml @@ -0,0 +1,4 @@ +include: "_default_template_yaml" +task: "mmlu_pro_llama_other" +task_alias: "other" +process_docs: !function utils.process_other diff --git a/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_philosophy.yaml b/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4bfe8772e0ebf956ad09a999c172e9cf8cb9c178 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_philosophy.yaml @@ -0,0 +1,4 @@ +include: "_default_template_yaml" +task: "mmlu_pro_llama_philosophy" +task_alias: "philosophy" +process_docs: !function utils.process_philosophy diff --git a/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_physics.yaml b/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b95a8b1431fe72a20965492bb8dc39108bd55120 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_physics.yaml @@ -0,0 +1,4 @@ +include: "_default_template_yaml" +task: "mmlu_pro_llama_physics" +task_alias: "physics" +process_docs: !function utils.process_physics diff --git a/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_psychology.yaml b/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cf3ad99828129cf32623855a32c5b7debb24d6f6 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu_pro/mmlu_pro_psychology.yaml @@ -0,0 +1,4 @@ +include: "_default_template_yaml" +task: "mmlu_pro_llama_psychology" +task_alias: "psychology" +process_docs: !function utils.process_psychology diff --git a/lm_eval/tasks/llama3/instruct/mmlu_pro/utils.py b/lm_eval/tasks/llama3/instruct/mmlu_pro/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..4dfc24e03f5b71e64da0e7408dddfdc75749f1c2 --- /dev/null +++ b/lm_eval/tasks/llama3/instruct/mmlu_pro/utils.py @@ -0,0 +1,27 @@ +import re +from functools import partial + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["category"] == subject) + + +def fewshot_to_text(example): + text = example["cot_content"].removeprefix("A: Let's think step by step.").strip() + return re.sub(r"The answer is \(([A-Z])\)\.", r"The best answer is \1.", text) + + +process_biology = partial(process_docs, subject="biology") +process_business = partial(process_docs, subject="business") +process_chemistry = partial(process_docs, subject="chemistry") +process_computer_science = partial(process_docs, subject="computer science") +process_economics = partial(process_docs, subject="economics") +process_engineering = partial(process_docs, subject="engineering") +process_health = partial(process_docs, subject="health") +process_history = partial(process_docs, subject="history") +process_law = partial(process_docs, subject="law") +process_math = partial(process_docs, subject="math") +process_other = partial(process_docs, subject="other") +process_philosophy = partial(process_docs, subject="philosophy") +process_physics = partial(process_docs, subject="physics") +process_psychology = partial(process_docs, subject="psychology") diff --git a/lm_eval/tasks/mbpp/README.md b/lm_eval/tasks/mbpp/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fd6df44fb76a1a9e017d60afc470200967696f19 --- /dev/null +++ b/lm_eval/tasks/mbpp/README.md @@ -0,0 +1,43 @@ +# MBPP + +## Paper +Program Synthesis with Large Language Models +https://arxiv.org/abs/2108.07732 + +This paper explores the limits of the current generation of large language models for program synthesis in general purpose programming languages. We evaluate a collection of such models (with between 244M and 137B parameters) on two new benchmarks, MBPP and MathQA-Python, in both the few-shot and fine-tuning regimes. Our benchmarks are designed to measure the ability of these models to synthesize short Python programs from natural language descriptions. The Mostly Basic Programming Problems (MBPP) dataset contains 974 programming tasks, designed to be solvable by entry-level programmers. The MathQA-Python dataset, a Python version of the MathQA benchmark, contains 23914 problems that evaluate the ability of the models to synthesize code from more complex text. On both datasets, we find that synthesis performance scales log-linearly with model size. Our largest models, even without finetuning on a code dataset, can synthesize solutions to 59.6 percent of the problems from MBPP using few-shot learning with a well-designed prompt. Fine-tuning on a held-out portion of the dataset improves performance by about 10 percentage points across most model sizes. On the MathQA-Python dataset, the largest fine-tuned model achieves 83.8 percent accuracy. Going further, we study the model's ability to engage in dialog about code, incorporating human feedback to improve its solutions. We find that natural language feedback from a human halves the error rate compared to the model's initial prediction. Additionally, we conduct an error analysis to shed light on where these models fall short and what types of programs are most difficult to generate. Finally, we explore the semantic grounding of these models by fine-tuning them to predict the results of program execution. We find that even our best models are generally unable to predict the output of a program given a specific input. + +Homepage: https://github.com/google-research/google-research/tree/master/mbpp + + +## Citation +``` +@article{austin2021program, + title={Program synthesis with large language models}, + author={Austin, Jacob and Odena, Augustus and Nye, Maxwell and Bosma, Maarten and Michalewski, Henryk and Dohan, David and Jiang, Ellen and Cai, Carrie and Terry, Michael and Le, Quoc and others}, + journal={arXiv preprint arXiv:2108.07732}, + year={2021} +} +``` + +### Groups and Tasks + +#### Groups + +* Not part of a group yet. + +#### Tasks + +- `mbpp` + +### Checklist + +For adding novel benchmarks/datasets to the library: +* [x] Is the task an existing benchmark in the literature? + * [x] Have you referenced the original paper that introduced the task? + * [ ] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test? + + +If other tasks on this dataset are already supported: +* [ ] Is the "Main" variant of this task clearly denoted? +* [ ] Have you provided a short sentence in a README on what each new variant adds / evaluates? +* [ ] Have you noted which, if any, published evaluation setups are matched by this variant? diff --git a/lm_eval/tasks/mbpp/mbpp.yaml b/lm_eval/tasks/mbpp/mbpp.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a5b58d900186cb127b26e3bd20f31d434abdeb2a --- /dev/null +++ b/lm_eval/tasks/mbpp/mbpp.yaml @@ -0,0 +1,23 @@ +task: mbpp +dataset_path: google-research-datasets/mbpp +dataset_name: full +unsafe_code: true +output_type: generate_until +test_split: test +doc_to_text: "You are an expert Python programmer, and here is your task: {{text}} Your code should pass these tests:\n\n{{test_list[0]}}\n{{test_list[1]}}\n{{test_list[2]}}\n[BEGIN]\n" +doc_to_target: "{% if is_fewshot is defined %}{{code}}\n[DONE]{% else %}{{test_list[0]}}\n{{test_list[1]}}\n{{test_list[2]}}{% endif %}" +target_delimiter: "" +metric_list: + - metric: !function utils.pass_at_1 + aggregation: mean + higher_is_better: true +generation_kwargs: + until: + - "[DONE]" + do_sample: false +num_fewshot: 3 +fewshot_config: + sampler: first_n + samples: !function utils.list_fewshot_samples +metadata: + version: 1.0 diff --git a/lm_eval/tasks/mbpp/utils.py b/lm_eval/tasks/mbpp/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..2d94b51275be531b311a0b2b9285ea876fe7682a --- /dev/null +++ b/lm_eval/tasks/mbpp/utils.py @@ -0,0 +1,58 @@ +import evaluate as hf_evaluate + + +try: + pass_at_k = hf_evaluate.load("code_eval") + + # run simple test to check code execution is enabled before model generation + test_cases = ["assert add(2, 3)==5"] + candidates = [["def add(a,b): return a*b"]] + results = pass_at_k.compute(references=test_cases, predictions=candidates, k=[1]) +except Exception as e: + raise e + + +def pass_at_1(references, predictions): + return pass_at_k.compute( + references=references, + predictions=[predictions], + k=[1], + )[0]["pass@1"] + + +def list_fewshot_samples(): + return [ + { + "task_id": 2, + "text": "Write a function to find the similar elements from the given two tuple lists.", + "code": "def similar_elements(test_tup1, test_tup2):\r\n res = tuple(set(test_tup1) & set(test_tup2))\r\n return (res) ", + "test_list": [ + "assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)", + "assert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4)", + "assert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14)", + ], + "is_fewshot": True, + }, + { + "task_id": 3, + "text": "Write a python function to identify non-prime numbers.", + "code": "import math\r\ndef is_not_prime(n):\r\n result = False\r\n for i in range(2,int(math.sqrt(n)) + 1):\r\n if n % i == 0:\r\n result = True\r\n return result", + "test_list": [ + "assert is_not_prime(2) == False", + "assert is_not_prime(10) == True", + "assert is_not_prime(35) == True", + ], + "is_fewshot": True, + }, + { + "task_id": 4, + "text": "Write a function to find the largest integers from a given list of numbers using heap queue algorithm.", + "code": "import heapq as hq\r\ndef heap_queue_largest(nums,n):\r\n largest_nums = hq.nlargest(n, nums)\r\n return largest_nums", + "test_list": [ + "assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] ", + "assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] ", + "assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35]", + ], + "is_fewshot": True, + }, + ] diff --git a/lm_eval/tasks/mgsm/README.md b/lm_eval/tasks/mgsm/README.md index 90f8e44bb05394cb95c121946febbaaad6c48d27..3b62edf136e2e1482b4da45febf786b8f4fe4c7c 100644 --- a/lm_eval/tasks/mgsm/README.md +++ b/lm_eval/tasks/mgsm/README.md @@ -92,3 +92,7 @@ If other tasks on this dataset are already supported: * [ ] Is the "Main" variant of this task clearly denoted? * [ ] Have you provided a short sentence in a README on what each new variant adds / evaluates? * [ ] Have you noted which, if any, published evaluation setups are matched by this variant? + +# changelog +- (en_cot, direct) ver 3; (native_cot) ver 4: issue #2578; PR #2587 + - fix fewshot format: Changed inconsistent usage of ':' (ASCII) and ':' (Chinese) to use ':' consistently. diff --git a/lm_eval/tasks/mgsm/direct/direct_yaml b/lm_eval/tasks/mgsm/direct/direct_yaml index d2e301ba27abf703bff771bd4b79fa2448d188c1..3dd83c0c92bad498e6b83f503ef136766d550223 100644 --- a/lm_eval/tasks/mgsm/direct/direct_yaml +++ b/lm_eval/tasks/mgsm/direct/direct_yaml @@ -32,4 +32,4 @@ metric_list: ignore_case: true ignore_punctuation: true metadata: - version: 2.0 + version: 3.0 diff --git a/lm_eval/tasks/mgsm/direct/mgsm_direct_ja.yaml b/lm_eval/tasks/mgsm/direct/mgsm_direct_ja.yaml index 7de11a486d4c5eaf7a2675fec8c9812f7beae0c0..b9a1ce2bb1e9f07cdada7f95dd153fa31e70daf8 100644 --- a/lm_eval/tasks/mgsm/direct/mgsm_direct_ja.yaml +++ b/lm_eval/tasks/mgsm/direct/mgsm_direct_ja.yaml @@ -1,11 +1,11 @@ # Generated by utils.py dataset_name: ja doc_to_target: '{% if answer is not none %}{{answer[11:]}}{% else %}{{answer_number|string}}{% endif %}' -doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"問題: "+question+"\nAnswer:"}}{% endif %}' +doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"問題: "+question+"\nAnswer:"}}{% endif %}' generation_kwargs: do_sample: false until: - - '問題:' + - 問題: - - <|im_end|> include: direct_yaml diff --git a/lm_eval/tasks/mgsm/direct/mgsm_direct_zh.yaml b/lm_eval/tasks/mgsm/direct/mgsm_direct_zh.yaml index 283e63f8bcd9f910ea9aa7560ed1c68819c0351a..462a92c36a7b5d42b0d964538ac3a2a44bfb1c6a 100644 --- a/lm_eval/tasks/mgsm/direct/mgsm_direct_zh.yaml +++ b/lm_eval/tasks/mgsm/direct/mgsm_direct_zh.yaml @@ -1,11 +1,11 @@ # Generated by utils.py dataset_name: zh doc_to_target: '{% if answer is not none %}{{answer[6:]}}{% else %}{{answer_number|string}}{% endif %}' -doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"问题: "+question+"\nAnswer:"}}{% endif %}' +doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"问题: "+question+"\nAnswer:"}}{% endif %}' generation_kwargs: do_sample: false until: - - '问题:' + - 问题: - - <|im_end|> include: direct_yaml diff --git a/lm_eval/tasks/mgsm/en_cot/cot_yaml b/lm_eval/tasks/mgsm/en_cot/cot_yaml index b53ae970800e31ff95815f0410407e90ea6afe30..6f3fabaa767d67cc2d36e700e3f0081f3d5cde9d 100644 --- a/lm_eval/tasks/mgsm/en_cot/cot_yaml +++ b/lm_eval/tasks/mgsm/en_cot/cot_yaml @@ -33,4 +33,4 @@ filter_list: - function: take_first name: flexible-extract metadata: - version: 2.0 + version: 3.0 diff --git a/lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_ja.yaml b/lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_ja.yaml index c98060357ebd1ed60b61555c954a035b9e0080f6..fb324970fbba46b6b77c2a6fb397ad5bc60e5816 100644 --- a/lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_ja.yaml +++ b/lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_ja.yaml @@ -1,11 +1,11 @@ # Generated by utils.py dataset_name: ja doc_to_target: '{% if answer is not none %}{{answer[11:]}}{% else %}{{answer_number|string}}{% endif %}' -doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"問題: "+question+"\nStep-by-Step Answer:"}}{% endif %}' +doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"問題: "+question+"\nStep-by-Step Answer:"}}{% endif %}' generation_kwargs: do_sample: false until: - - '問題:' + - 問題: - - <|im_end|> include: cot_yaml diff --git a/lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_zh.yaml b/lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_zh.yaml index f45004aacfd93bc4786b9ebd42cc6283d9a31785..ebc822d698cdacc90075b034ee37a44fd4027bed 100644 --- a/lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_zh.yaml +++ b/lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_zh.yaml @@ -1,11 +1,11 @@ # Generated by utils.py dataset_name: zh doc_to_target: '{% if answer is not none %}{{answer[6:]}}{% else %}{{answer_number|string}}{% endif %}' -doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"问题: "+question+"\nStep-by-Step Answer:"}}{% endif %}' +doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"问题: "+question+"\nStep-by-Step Answer:"}}{% endif %}' generation_kwargs: do_sample: false until: - - '问题:' + - 问题: - - <|im_end|> include: cot_yaml diff --git a/lm_eval/tasks/mgsm/native_cot/cot_yaml b/lm_eval/tasks/mgsm/native_cot/cot_yaml index eb058ca42aca61df8d180e25879d39d44985f9eb..80e5f443e499e8534a56682e3eb20e692e622d00 100644 --- a/lm_eval/tasks/mgsm/native_cot/cot_yaml +++ b/lm_eval/tasks/mgsm/native_cot/cot_yaml @@ -28,4 +28,4 @@ filter_list: regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)" - function: "take_first" metadata: - version: 3.0 + version: 4.0 diff --git a/lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_ja.yaml b/lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_ja.yaml index 8e56bd0b15150e1e435b4d304255c0a751246e86..3715aca53b7bd9d4af3c95a95fa5ebdd8e7e000e 100644 --- a/lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_ja.yaml +++ b/lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_ja.yaml @@ -1,7 +1,7 @@ # Generated by utils.py dataset_name: ja doc_to_target: '{% if answer is not none %}{{answer[11:]}}{% else %}{{answer_number|string}}{% endif %}' -doc_to_text: '{% if answer is not none %}{{question+"\nステップごとの答え:"}}{% else %}{{"問題: "+question+"\nステップごとの答え:"}}{% endif %}' +doc_to_text: '{% if answer is not none %}{{question+"\nステップごとの答え:"}}{% else %}{{"問題: "+question+"\nステップごとの答え:"}}{% endif %}' filter_list: - filter: - function: regex @@ -17,7 +17,7 @@ filter_list: generation_kwargs: do_sample: false until: - - '問題:' + - 問題: - - <|im_end|> include: cot_yaml diff --git a/lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_zh.yaml b/lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_zh.yaml index 3f0d7e2dcecaecee05671a636b0a3e27eeeee95e..2b45170ca33e27ad4208a876a24a3f8f2373676f 100644 --- a/lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_zh.yaml +++ b/lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_zh.yaml @@ -1,7 +1,7 @@ # Generated by utils.py dataset_name: zh doc_to_target: '{% if answer is not none %}{{answer[6:]}}{% else %}{{answer_number|string}}{% endif %}' -doc_to_text: '{% if answer is not none %}{{question+"\n逐步解答:"}}{% else %}{{"问题: "+question+"\n逐步解答:"}}{% endif %}' +doc_to_text: '{% if answer is not none %}{{question+"\n逐步解答:"}}{% else %}{{"问题: "+question+"\n逐步解答:"}}{% endif %}' filter_list: - filter: - function: regex @@ -17,7 +17,7 @@ filter_list: generation_kwargs: do_sample: false until: - - '问题:' + - 问题: - - <|im_end|> include: cot_yaml diff --git a/lm_eval/tasks/mgsm/utils.py b/lm_eval/tasks/mgsm/utils.py index 116214f9f4c45ffb9a04757ca41c58114180b259..54e39af9b90ae0a7da777da3ca0524467942b58e 100644 --- a/lm_eval/tasks/mgsm/utils.py +++ b/lm_eval/tasks/mgsm/utils.py @@ -75,7 +75,7 @@ LANGUAGES = { }, "ja": { # Japanese # "QUESTION": "問題:", - "QUESTION": "\u554f\u984c:", + "QUESTION": "\u554f\u984c:", # "ANSWER": "ステップごとの答え:", "ANSWER": "\u30b9\u30c6\u30c3\u30d7\u3054\u3068\u306e\u7b54\u3048:", "DIRECT": "Answer:", @@ -84,7 +84,7 @@ LANGUAGES = { }, "zh": { # Chinese # "QUESTION": "问题:", - "QUESTION": "\u95ee\u9898:", + "QUESTION": "\u95ee\u9898:", # "ANSWER": "逐步解答:", "ANSWER": "\u9010\u6b65\u89e3\u7b54:", "DIRECT": "Answer:", diff --git a/lm_eval/tasks/mlqa/README.md b/lm_eval/tasks/mlqa/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3d82f95ff05e8ce7dbd71ba2e36f997dad92def0 --- /dev/null +++ b/lm_eval/tasks/mlqa/README.md @@ -0,0 +1,101 @@ +# MLQA + +### Paper + +Title: `MLQA: Evaluating Cross-lingual Extractive Question Answering` + +Abstract: `https://arxiv.org/abs/1910.07475` + +MLQA (MultiLingual Question Answering) is a benchmark dataset for evaluating cross-lingual question answering performance. +MLQA consists of over 5K extractive QA instances (12K in English) in SQuAD format in seven languages - English, Arabic, +German, Spanish, Hindi, Vietnamese and Simplified Chinese. MLQA is highly parallel, with QA instances parallel between +4 different languages on average + +Homepage: `https://github.com/facebookresearch/MLQA` + + +### Citation + +``` +@misc{lewis2020mlqaevaluatingcrosslingualextractive, + title={MLQA: Evaluating Cross-lingual Extractive Question Answering}, + author={Patrick Lewis and Barlas Oğuz and Ruty Rinott and Sebastian Riedel and Holger Schwenk}, + year={2020}, + eprint={1910.07475}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + url={https://arxiv.org/abs/1910.07475}, +} +``` + +### Groups, Tags, and Tasks + +#### Groups + +* Not part of a group yet + +#### Tasks + +Tasks of the form `mlqa_context-lang_question-lang.yaml` +* `mlqa_ar_ar.yaml` +* `mlqa_ar_de.yaml` +* `mlqa_ar_vi.yaml` +* `mlqa_ar_zh.yaml` +* `mlqa_ar_en.yaml` +* `mlqa_ar_es.yaml` +* `mlqa_ar_hi.yaml` +* `mlqa_de_ar.yaml` +* `mlqa_de_de.yaml` +* `mlqa_de_vi.yaml` +* `mlqa_de_zh.yaml` +* `mlqa_de_en.yaml` +* `mlqa_de_es.yaml` +* `mlqa_de_hi.yaml` +* `mlqa_vi_ar.yaml` +* `mlqa_vi_de.yaml` +* `mlqa_vi_vi.yaml` +* `mlqa_vi_zh.yaml` +* `mlqa_vi_en.yaml` +* `mlqa_vi_es.yaml` +* `mlqa_vi_hi.yaml` +* `mlqa_zh_ar.yaml` +* `mlqa_zh_de.yaml` +* `mlqa_zh_vi.yaml` +* `mlqa_zh_zh.yaml` +* `mlqa_zh_en.yaml` +* `mlqa_zh_es.yaml` +* `mlqa_zh_hi.yaml` +* `mlqa_en_ar.yaml` +* `mlqa_en_de.yaml` +* `mlqa_en_vi.yaml` +* `mlqa_en_zh.yaml` +* `mlqa_en_en.yaml` +* `mlqa_en_es.yaml` +* `mlqa_en_hi.yaml` +* `mlqa_es_ar.yaml` +* `mlqa_es_de.yaml` +* `mlqa_es_vi.yaml` +* `mlqa_es_zh.yaml` +* `mlqa_es_en.yaml` +* `mlqa_es_es.yaml` +* `mlqa_es_hi.yaml` +* `mlqa_hi_ar.yaml` +* `mlqa_hi_de.yaml` +* `mlqa_hi_vi.yaml` +* `mlqa_hi_zh.yaml` +* `mlqa_hi_en.yaml` +* `mlqa_hi_es.yaml` +* `mlqa_hi_hi.yaml` + +### Checklist + +For adding novel benchmarks/datasets to the library: +* [x] Is the task an existing benchmark in the literature? + * [x] Have you referenced the original paper that introduced the task? + * [x] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test? + + +If other tasks on this dataset are already supported: +* [ ] Is the "Main" variant of this task clearly denoted? +* [ ] Have you provided a short sentence in a README on what each new variant adds / evaluates? +* [ ] Have you noted which, if any, published evaluation setups are matched by this variant? diff --git a/lm_eval/tasks/mlqa/generate_tasks.py b/lm_eval/tasks/mlqa/generate_tasks.py new file mode 100644 index 0000000000000000000000000000000000000000..19bd3533af6c97132ec8fea3ea94997530378e66 --- /dev/null +++ b/lm_eval/tasks/mlqa/generate_tasks.py @@ -0,0 +1,48 @@ +# ruff: noqa: E731, E741 +""" +Script to generate task YAMLs for the mlqa dataset. +Based on `tasks/bigbench/generate_tasks.py`. +""" + +from datasets import get_dataset_config_names + + +chosen_subtasks = [] + +language_dict = { + "en": "english", + "es": "spanish", + "hi": "hindi", + "vi": "vietnamese", + "de": "german", + "ar": "arabic", + "zh": "chinese", +} + + +def main() -> None: + configs = get_dataset_config_names("facebook/mlqa", trust_remote_code=True) + for config in configs: + if len(config.split(".")) == 2: + continue + else: + chosen_subtasks.append(config) + assert len(chosen_subtasks) == 49 + for task in chosen_subtasks: + file_name = f"{task.replace('.', '_')}.yaml" + context_lang = file_name.split("_")[1] + # Not using yaml to avoid tagging issues with !function + with open(file_name, "w", encoding="utf-8") as f: + f.write("# Generated by generate_tasks.py\n") + + # Manually writing the YAML-like content inside files to avoid tagging issues + f.write("include: mlqa_common_yaml\n") + f.write(f"task: {task.replace('.', '_')}\n") + f.write(f"dataset_name: {task}\n") + f.write( + f"process_results: !function utils.process_results_{context_lang}\n" + ) + + +if __name__ == "__main__": + main() diff --git a/lm_eval/tasks/mlqa/mlqa_ar_ar.yaml b/lm_eval/tasks/mlqa/mlqa_ar_ar.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8db625acce6d92b58dc601725da7bedb3e5e76ea --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_ar_ar.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_ar_ar +dataset_name: mlqa.ar.ar +process_results: !function utils.process_results_ar diff --git a/lm_eval/tasks/mlqa/mlqa_ar_de.yaml b/lm_eval/tasks/mlqa/mlqa_ar_de.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3d1468a7bd82ae9d682766042e99069ed6ed92a7 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_ar_de.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_ar_de +dataset_name: mlqa.ar.de +process_results: !function utils.process_results_ar diff --git a/lm_eval/tasks/mlqa/mlqa_ar_en.yaml b/lm_eval/tasks/mlqa/mlqa_ar_en.yaml new file mode 100644 index 0000000000000000000000000000000000000000..18e763e8ac464ad922fc228d2449be4ad20568d9 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_ar_en.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_ar_en +dataset_name: mlqa.ar.en +process_results: !function utils.process_results_ar diff --git a/lm_eval/tasks/mlqa/mlqa_ar_es.yaml b/lm_eval/tasks/mlqa/mlqa_ar_es.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c93ef03ec0a7fff3090b5e9f269b97c7de8a35cc --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_ar_es.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_ar_es +dataset_name: mlqa.ar.es +process_results: !function utils.process_results_ar diff --git a/lm_eval/tasks/mlqa/mlqa_ar_hi.yaml b/lm_eval/tasks/mlqa/mlqa_ar_hi.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5abb023ccdaf5912451f1093d4a1c9295902d6e3 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_ar_hi.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_ar_hi +dataset_name: mlqa.ar.hi +process_results: !function utils.process_results_ar diff --git a/lm_eval/tasks/mlqa/mlqa_ar_vi.yaml b/lm_eval/tasks/mlqa/mlqa_ar_vi.yaml new file mode 100644 index 0000000000000000000000000000000000000000..54869c657d682971518fc12bb83f24f8389e46c9 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_ar_vi.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_ar_vi +dataset_name: mlqa.ar.vi +process_results: !function utils.process_results_ar diff --git a/lm_eval/tasks/mlqa/mlqa_ar_zh.yaml b/lm_eval/tasks/mlqa/mlqa_ar_zh.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5236d6cb873fa95582ac4bcc3fd95f940323a188 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_ar_zh.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_ar_zh +dataset_name: mlqa.ar.zh +process_results: !function utils.process_results_ar diff --git a/lm_eval/tasks/mlqa/mlqa_common_yaml b/lm_eval/tasks/mlqa/mlqa_common_yaml new file mode 100644 index 0000000000000000000000000000000000000000..c52ecb8914a7ddb24a838ff0570599ff43f98836 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_common_yaml @@ -0,0 +1,22 @@ +dataset_path: facebook/mlqa +dataset_kwargs: + trust_remote_code: true +test_split: test +validation_split: validation +output_type: generate_until +doc_to_text: "Context: {{context}}\n\nQuestion: {{question}}\n\nAnswer:" +doc_to_target: "{{answers}}" +process_docs: !function utils.process_docs +metric_list: + - metric: exact_match + aggregation: mean + higher_is_better: true + - metric: f1 + aggregation: mean + higher_is_better: true +generation_kwargs: + until: + - "\n" + do_sample: false +metadata: + version: 0.0 diff --git a/lm_eval/tasks/mlqa/mlqa_de_ar.yaml b/lm_eval/tasks/mlqa/mlqa_de_ar.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1090a58925ed033bc18f5546e2b8d93619992b3c --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_de_ar.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_de_ar +dataset_name: mlqa.de.ar +process_results: !function utils.process_results_de diff --git a/lm_eval/tasks/mlqa/mlqa_de_de.yaml b/lm_eval/tasks/mlqa/mlqa_de_de.yaml new file mode 100644 index 0000000000000000000000000000000000000000..be465ab57a4073d29e092178403e452df122eb2e --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_de_de.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_de_de +dataset_name: mlqa.de.de +process_results: !function utils.process_results_de diff --git a/lm_eval/tasks/mlqa/mlqa_de_en.yaml b/lm_eval/tasks/mlqa/mlqa_de_en.yaml new file mode 100644 index 0000000000000000000000000000000000000000..55f2652ce48613d17534020ec5e0e452812ec5dd --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_de_en.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_de_en +dataset_name: mlqa.de.en +process_results: !function utils.process_results_de diff --git a/lm_eval/tasks/mlqa/mlqa_de_es.yaml b/lm_eval/tasks/mlqa/mlqa_de_es.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d4f085e6241482f35c647a3b1398f2913b4a5a53 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_de_es.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_de_es +dataset_name: mlqa.de.es +process_results: !function utils.process_results_de diff --git a/lm_eval/tasks/mlqa/mlqa_de_hi.yaml b/lm_eval/tasks/mlqa/mlqa_de_hi.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ff3bbc428634ad0afaac7dc44f1911acf1258fe7 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_de_hi.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_de_hi +dataset_name: mlqa.de.hi +process_results: !function utils.process_results_de diff --git a/lm_eval/tasks/mlqa/mlqa_de_vi.yaml b/lm_eval/tasks/mlqa/mlqa_de_vi.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe61983b70ce347d774cff2d20dee1c8afb1a019 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_de_vi.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_de_vi +dataset_name: mlqa.de.vi +process_results: !function utils.process_results_de diff --git a/lm_eval/tasks/mlqa/mlqa_de_zh.yaml b/lm_eval/tasks/mlqa/mlqa_de_zh.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ee1855626fdf5ab157894dfd5636fe7b2fc58739 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_de_zh.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_de_zh +dataset_name: mlqa.de.zh +process_results: !function utils.process_results_de diff --git a/lm_eval/tasks/mlqa/mlqa_en_ar.yaml b/lm_eval/tasks/mlqa/mlqa_en_ar.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a8c72d2694351760a9c9f9b6332615238af2e125 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_en_ar.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_en_ar +dataset_name: mlqa.en.ar +process_results: !function utils.process_results_en diff --git a/lm_eval/tasks/mlqa/mlqa_en_de.yaml b/lm_eval/tasks/mlqa/mlqa_en_de.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b27e02ae6c33115ccd6f69c0487332468614d9c3 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_en_de.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_en_de +dataset_name: mlqa.en.de +process_results: !function utils.process_results_en diff --git a/lm_eval/tasks/mlqa/mlqa_en_en.yaml b/lm_eval/tasks/mlqa/mlqa_en_en.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d15e222f7bf8ec0f09526e12ecac6788c1174568 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_en_en.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_en_en +dataset_name: mlqa.en.en +process_results: !function utils.process_results_en diff --git a/lm_eval/tasks/mlqa/mlqa_en_es.yaml b/lm_eval/tasks/mlqa/mlqa_en_es.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eddb728f02529dcaaf05ef1c57920ff36e254150 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_en_es.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_en_es +dataset_name: mlqa.en.es +process_results: !function utils.process_results_en diff --git a/lm_eval/tasks/mlqa/mlqa_en_hi.yaml b/lm_eval/tasks/mlqa/mlqa_en_hi.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7c2e38249a012568baddc13b961e11a7174a2555 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_en_hi.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_en_hi +dataset_name: mlqa.en.hi +process_results: !function utils.process_results_en diff --git a/lm_eval/tasks/mlqa/mlqa_en_vi.yaml b/lm_eval/tasks/mlqa/mlqa_en_vi.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1a2f635ea385f0dbfff79e470f066b8f260aa52f --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_en_vi.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_en_vi +dataset_name: mlqa.en.vi +process_results: !function utils.process_results_en diff --git a/lm_eval/tasks/mlqa/mlqa_en_zh.yaml b/lm_eval/tasks/mlqa/mlqa_en_zh.yaml new file mode 100644 index 0000000000000000000000000000000000000000..91336eba9a7fd8e33fe7845c27b2b21f88d9177e --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_en_zh.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_en_zh +dataset_name: mlqa.en.zh +process_results: !function utils.process_results_en diff --git a/lm_eval/tasks/mlqa/mlqa_es_ar.yaml b/lm_eval/tasks/mlqa/mlqa_es_ar.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9a24508cbd630e5f1fd25ed070821051214b0a72 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_es_ar.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_es_ar +dataset_name: mlqa.es.ar +process_results: !function utils.process_results_es diff --git a/lm_eval/tasks/mlqa/mlqa_es_de.yaml b/lm_eval/tasks/mlqa/mlqa_es_de.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9a40b2b6956b91b39aa8a7d16d98ef91098f3665 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_es_de.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_es_de +dataset_name: mlqa.es.de +process_results: !function utils.process_results_es diff --git a/lm_eval/tasks/mlqa/mlqa_es_en.yaml b/lm_eval/tasks/mlqa/mlqa_es_en.yaml new file mode 100644 index 0000000000000000000000000000000000000000..660968c7fd9131cb54ba23be4eaf51e0ed68ff35 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_es_en.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_es_en +dataset_name: mlqa.es.en +process_results: !function utils.process_results_es diff --git a/lm_eval/tasks/mlqa/mlqa_es_es.yaml b/lm_eval/tasks/mlqa/mlqa_es_es.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1232947b92715bf3b714ee3a9aa1f525a532bc68 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_es_es.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_es_es +dataset_name: mlqa.es.es +process_results: !function utils.process_results_es diff --git a/lm_eval/tasks/mlqa/mlqa_es_hi.yaml b/lm_eval/tasks/mlqa/mlqa_es_hi.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5502288925c25e2fda50ed630abd292989464e93 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_es_hi.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_es_hi +dataset_name: mlqa.es.hi +process_results: !function utils.process_results_es diff --git a/lm_eval/tasks/mlqa/mlqa_es_vi.yaml b/lm_eval/tasks/mlqa/mlqa_es_vi.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0ea9027dec2a1cee00d5e145f8ce68c02ccb9f4d --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_es_vi.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_es_vi +dataset_name: mlqa.es.vi +process_results: !function utils.process_results_es diff --git a/lm_eval/tasks/mlqa/mlqa_es_zh.yaml b/lm_eval/tasks/mlqa/mlqa_es_zh.yaml new file mode 100644 index 0000000000000000000000000000000000000000..caecd1b2d0d8c1600596cfb9ed844e044f0eefdf --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_es_zh.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_es_zh +dataset_name: mlqa.es.zh +process_results: !function utils.process_results_es diff --git a/lm_eval/tasks/mlqa/mlqa_hi_ar.yaml b/lm_eval/tasks/mlqa/mlqa_hi_ar.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e4c4263a1d4c0d326a6fabfb5d4036037c152d75 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_hi_ar.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_hi_ar +dataset_name: mlqa.hi.ar +process_results: !function utils.process_results_hi diff --git a/lm_eval/tasks/mlqa/mlqa_hi_de.yaml b/lm_eval/tasks/mlqa/mlqa_hi_de.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8069b5a07b2c63f713bcea262f9d5209545507b1 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_hi_de.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_hi_de +dataset_name: mlqa.hi.de +process_results: !function utils.process_results_hi diff --git a/lm_eval/tasks/mlqa/mlqa_hi_en.yaml b/lm_eval/tasks/mlqa/mlqa_hi_en.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d7a18067bc0a568003f54317d2b9a44cc1770b2c --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_hi_en.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_hi_en +dataset_name: mlqa.hi.en +process_results: !function utils.process_results_hi diff --git a/lm_eval/tasks/mlqa/mlqa_hi_es.yaml b/lm_eval/tasks/mlqa/mlqa_hi_es.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d152ad66dc3e497c5bc5aa1094a1c0f97cdf2bae --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_hi_es.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_hi_es +dataset_name: mlqa.hi.es +process_results: !function utils.process_results_hi diff --git a/lm_eval/tasks/mlqa/mlqa_hi_hi.yaml b/lm_eval/tasks/mlqa/mlqa_hi_hi.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1ce79e6bbe5be2d476fdbbdf914107be31e2efea --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_hi_hi.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_hi_hi +dataset_name: mlqa.hi.hi +process_results: !function utils.process_results_hi diff --git a/lm_eval/tasks/mlqa/mlqa_hi_vi.yaml b/lm_eval/tasks/mlqa/mlqa_hi_vi.yaml new file mode 100644 index 0000000000000000000000000000000000000000..534d90f70dd08313f6f9a0cb68f5571f689e8569 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_hi_vi.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_hi_vi +dataset_name: mlqa.hi.vi +process_results: !function utils.process_results_hi diff --git a/lm_eval/tasks/mlqa/mlqa_hi_zh.yaml b/lm_eval/tasks/mlqa/mlqa_hi_zh.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8432db492dfc3c593adbf7255ead41c0a36be8b7 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_hi_zh.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_hi_zh +dataset_name: mlqa.hi.zh +process_results: !function utils.process_results_hi diff --git a/lm_eval/tasks/mlqa/mlqa_vi_ar.yaml b/lm_eval/tasks/mlqa/mlqa_vi_ar.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c22c11cd0613c88743f296fce025ae774be31a5a --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_vi_ar.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_vi_ar +dataset_name: mlqa.vi.ar +process_results: !function utils.process_results_vi diff --git a/lm_eval/tasks/mlqa/mlqa_vi_de.yaml b/lm_eval/tasks/mlqa/mlqa_vi_de.yaml new file mode 100644 index 0000000000000000000000000000000000000000..948ac3ac36637dfd300e0b0c74ca8a9c1a7bb1e8 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_vi_de.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_vi_de +dataset_name: mlqa.vi.de +process_results: !function utils.process_results_vi diff --git a/lm_eval/tasks/mlqa/mlqa_vi_en.yaml b/lm_eval/tasks/mlqa/mlqa_vi_en.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0106867703a5bcb7a447d942f8f8cf71269b4820 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_vi_en.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_vi_en +dataset_name: mlqa.vi.en +process_results: !function utils.process_results_vi diff --git a/lm_eval/tasks/mlqa/mlqa_vi_es.yaml b/lm_eval/tasks/mlqa/mlqa_vi_es.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9ac62c1056237afcb2d9e921d6b319903ae2af25 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_vi_es.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_vi_es +dataset_name: mlqa.vi.es +process_results: !function utils.process_results_vi diff --git a/lm_eval/tasks/mlqa/mlqa_vi_hi.yaml b/lm_eval/tasks/mlqa/mlqa_vi_hi.yaml new file mode 100644 index 0000000000000000000000000000000000000000..26b232a879778b6d7eee8bedfdbe7e758acba9aa --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_vi_hi.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_vi_hi +dataset_name: mlqa.vi.hi +process_results: !function utils.process_results_vi diff --git a/lm_eval/tasks/mlqa/mlqa_vi_vi.yaml b/lm_eval/tasks/mlqa/mlqa_vi_vi.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d8277d78ebc4ddd57ab2fe955adc80259e85902c --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_vi_vi.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_vi_vi +dataset_name: mlqa.vi.vi +process_results: !function utils.process_results_vi diff --git a/lm_eval/tasks/mlqa/mlqa_vi_zh.yaml b/lm_eval/tasks/mlqa/mlqa_vi_zh.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7ecc6b919281cd13b103a1564199b2ddf1db622b --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_vi_zh.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_vi_zh +dataset_name: mlqa.vi.zh +process_results: !function utils.process_results_vi diff --git a/lm_eval/tasks/mlqa/mlqa_zh_ar.yaml b/lm_eval/tasks/mlqa/mlqa_zh_ar.yaml new file mode 100644 index 0000000000000000000000000000000000000000..42c3713d5a156ed1c44afd3af08624b8d4bc75aa --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_zh_ar.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_zh_ar +dataset_name: mlqa.zh.ar +process_results: !function utils.process_results_zh diff --git a/lm_eval/tasks/mlqa/mlqa_zh_de.yaml b/lm_eval/tasks/mlqa/mlqa_zh_de.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cb5e4cb884a4bfcc25f1f5323514cfb49807654f --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_zh_de.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_zh_de +dataset_name: mlqa.zh.de +process_results: !function utils.process_results_zh diff --git a/lm_eval/tasks/mlqa/mlqa_zh_en.yaml b/lm_eval/tasks/mlqa/mlqa_zh_en.yaml new file mode 100644 index 0000000000000000000000000000000000000000..653f26aefa5d02f0250277e004333c1f1499b9fb --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_zh_en.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_zh_en +dataset_name: mlqa.zh.en +process_results: !function utils.process_results_zh diff --git a/lm_eval/tasks/mlqa/mlqa_zh_es.yaml b/lm_eval/tasks/mlqa/mlqa_zh_es.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c98203f76f6b5250c4df09b8b8b73ae7bb9964f4 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_zh_es.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_zh_es +dataset_name: mlqa.zh.es +process_results: !function utils.process_results_zh diff --git a/lm_eval/tasks/mlqa/mlqa_zh_hi.yaml b/lm_eval/tasks/mlqa/mlqa_zh_hi.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ed58f47f4d52f8a3ba1c9240bca15ac97f113874 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_zh_hi.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_zh_hi +dataset_name: mlqa.zh.hi +process_results: !function utils.process_results_zh diff --git a/lm_eval/tasks/mlqa/mlqa_zh_vi.yaml b/lm_eval/tasks/mlqa/mlqa_zh_vi.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7043676235f913e7826751503bb7146ae5a4b5fe --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_zh_vi.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_zh_vi +dataset_name: mlqa.zh.vi +process_results: !function utils.process_results_zh diff --git a/lm_eval/tasks/mlqa/mlqa_zh_zh.yaml b/lm_eval/tasks/mlqa/mlqa_zh_zh.yaml new file mode 100644 index 0000000000000000000000000000000000000000..792b5ee0c9ba13583de4914ceba5c76a941361a7 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_zh_zh.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml +task: mlqa_zh_zh +dataset_name: mlqa.zh.zh +process_results: !function utils.process_results_zh diff --git a/lm_eval/tasks/mlqa/utils.py b/lm_eval/tasks/mlqa/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..61e593716a968af4240e43024dfa90c8f0e0a53c --- /dev/null +++ b/lm_eval/tasks/mlqa/utils.py @@ -0,0 +1,165 @@ +""" +Code based on Official evaluation script for the MLQA dataset. +Repo: https://github.com/facebookresearch/MLQA/blob/main/mlqa_evaluation_v1.py +""" + +import re +import string +import sys +import unicodedata +from collections import Counter + +import datasets + + +PUNCT = { + chr(i) + for i in range(sys.maxunicode) + if unicodedata.category(chr(i)).startswith("P") +}.union(string.punctuation) +WHITESPACE_LANGS = ["en", "es", "hi", "vi", "de", "ar"] +MIXED_SEGMENTATION_LANGS = ["zh"] + + +def whitespace_tokenize(text): + return text.split() + + +def mixed_segmentation(text): + segs_out = [] + temp_str = "" + for char in text: + if re.search(r"[\u4e00-\u9fa5]", char) or char in PUNCT: + if temp_str != "": + ss = whitespace_tokenize(temp_str) + segs_out.extend(ss) + temp_str = "" + segs_out.append(char) + else: + temp_str += char + + if temp_str != "": + ss = whitespace_tokenize(temp_str) + segs_out.extend(ss) + + return segs_out + + +def normalize_answer(s, lang): + """Lower text and remove punctuation, articles and extra whitespace.""" + + def remove_articles(text, lang): + if lang == "en": + return re.sub(r"\b(a|an|the)\b", " ", text) + elif lang == "es": + return re.sub(r"\b(un|una|unos|unas|el|la|los|las)\b", " ", text) + elif lang == "hi": + return text # Hindi does not have formal articles + elif lang == "vi": + return re.sub(r"\b(của|là|cái|chiếc|những)\b", " ", text) + elif lang == "de": + return re.sub( + r"\b(ein|eine|einen|einem|eines|einer|der|die|das|den|dem|des)\b", + " ", + text, + ) + elif lang == "ar": + return re.sub(r"\sال^|ال", " ", text) + elif lang == "zh": + return text # Chinese does not have formal articles + else: + raise Exception("Unknown Language {}".format(lang)) + + def white_space_fix(text, lang): + if lang in WHITESPACE_LANGS: + tokens = whitespace_tokenize(text) + elif lang in MIXED_SEGMENTATION_LANGS: + tokens = mixed_segmentation(text) + else: + raise Exception("Unknown Language {}".format(lang)) + return " ".join([t for t in tokens if t.strip() != ""]) + + def remove_punc(text): + return "".join(ch for ch in text if ch not in PUNCT) + + def lower(text): + return text.lower() + + return white_space_fix(remove_articles(remove_punc(lower(s)), lang), lang) + + +def f1_score(prediction, ground_truth, lang): + prediction_tokens = normalize_answer(prediction, lang).split() + ground_truth_tokens = normalize_answer(ground_truth, lang).split() + common = Counter(prediction_tokens) & Counter(ground_truth_tokens) + num_same = sum(common.values()) + if num_same == 0: + return 0 + precision = 1.0 * num_same / len(prediction_tokens) + recall = 1.0 * num_same / len(ground_truth_tokens) + f1 = (2 * precision * recall) / (precision + recall) + return f1 + + +def exact_match_score(prediction, ground_truth, lang): + return normalize_answer(prediction, lang) == normalize_answer(ground_truth, lang) + + +def metric_max_over_ground_truths(metric_fn, prediction, ground_truths, lang): + scores_for_ground_truths = [] + for ground_truth in ground_truths: + score = metric_fn(prediction, ground_truth, lang) + scores_for_ground_truths.append(score) + return max(scores_for_ground_truths) + + +def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: + def _process_doc(doc): + out_doc = { + "context": doc["context"], + "question": doc["question"], + "answers": doc["answers"]["text"], + } + return out_doc + + return dataset.map(_process_doc) + + +# Base function +def process_results_lang(doc, results, lang): + ground_truths = doc["answers"] + prediction = results[0].strip() + exact_match = metric_max_over_ground_truths( + exact_match_score, prediction, ground_truths, lang + ) + f1 = metric_max_over_ground_truths(f1_score, prediction, ground_truths, lang) + return {"exact_match": exact_match, "f1": f1} + + +# Language Wrapper functions +def process_results_en(doc, results): + return process_results_lang(doc, results, "en") + + +def process_results_es(doc, results): + return process_results_lang(doc, results, "es") + + +def process_results_hi(doc, results): + return process_results_lang(doc, results, "hi") + + +def process_results_vi(doc, results): + return process_results_lang(doc, results, "vi") + + +def process_results_de(doc, results): + return process_results_lang(doc, results, "de") + + +def process_results_ar(doc, results): + return process_results_lang(doc, results, "ar") + + +def process_results_zh(doc, results): + return process_results_lang(doc, results, "zh") diff --git a/lm_eval/tasks/mmlu/_generate_configs.py b/lm_eval/tasks/mmlu/_generate_configs.py index 28b94616ddec7d0690be3d20c830990462f0dde6..58876d4c10610ddfcd68a3a30ea805360b124ce1 100644 --- a/lm_eval/tasks/mmlu/_generate_configs.py +++ b/lm_eval/tasks/mmlu/_generate_configs.py @@ -1,3 +1,4 @@ +# noqa """ Take in a YAML, and output all "other" splits with this YAML """ diff --git a/lm_eval/tasks/mmlu/generative/_default_template_yaml b/lm_eval/tasks/mmlu/generative/_default_template_yaml index 1452e0f5b34a899e537ad8889bf012c403ff58cc..7281f0a1e06ad370e2bf4933816b2724f2b55541 100644 --- a/lm_eval/tasks/mmlu/generative/_default_template_yaml +++ b/lm_eval/tasks/mmlu/generative/_default_template_yaml @@ -14,7 +14,21 @@ metric_list: - metric: exact_match aggregation: mean higher_is_better: true + ignore_punctuation: true + ignore_case: true +filter_list: + - name: get_response + filter: + # Filter everything after the first break line + - function: "regex" + regex_pattern: "^(.*?)(?=\\n|$)" + # Remove leading white spaces + - function: remove_whitespace + # function to ignore right white spaces or line breaks + - function: "regex" + regex_pattern: "^(.*?)\\s*$" + - function: take_first metadata: - version: 2.0 + version: 3.0 dataset_kwargs: trust_remote_code: true diff --git a/lm_eval/tasks/mmlu/generative/_mmlu.yaml b/lm_eval/tasks/mmlu/generative/_mmlu.yaml index 1a63611bdb2d35ef4fb358a5258187c1cab99a65..e4f4b5d5a81865d57140434e9cd589b88bb39096 100644 --- a/lm_eval/tasks/mmlu/generative/_mmlu.yaml +++ b/lm_eval/tasks/mmlu/generative/_mmlu.yaml @@ -5,29 +5,29 @@ task: task: - mmlu_stem_generative aggregate_metric_list: - - metric: acc - weight_by_size: True + - metric: exact_match + weight_by_size: true - group: other task: - mmlu_other_generative aggregate_metric_list: - - metric: acc - weight_by_size: True + - metric: exact_match + weight_by_size: true - group: social sciences task: - mmlu_social_sciences_generative aggregate_metric_list: - - metric: acc - weight_by_size: True + - metric: exact_match + weight_by_size: true - group: humanities task: - mmlu_humanities_generative aggregate_metric_list: - - metric: acc - weight_by_size: True + - metric: exact_match + weight_by_size: true aggregate_metric_list: - aggregation: mean metric: exact_match - weight_by_size: True + weight_by_size: true metadata: - version: 2 + version: 3 diff --git a/lm_eval/tasks/model_written_evals/winogenerated/winogenerated.yaml b/lm_eval/tasks/model_written_evals/winogenerated/winogenerated similarity index 100% rename from lm_eval/tasks/model_written_evals/winogenerated/winogenerated.yaml rename to lm_eval/tasks/model_written_evals/winogenerated/winogenerated diff --git a/lm_eval/tasks/portuguese_bench/flores_pt/create_yamls_flores_pt.py b/lm_eval/tasks/portuguese_bench/flores_pt/create_yamls_flores_pt.py index 677e6bb41dcbdc442ffa42c1d349c70eb9afb0d9..a185c74411eb0a4d738cb443d4eacee3de2eac7f 100644 --- a/lm_eval/tasks/portuguese_bench/flores_pt/create_yamls_flores_pt.py +++ b/lm_eval/tasks/portuguese_bench/flores_pt/create_yamls_flores_pt.py @@ -258,7 +258,7 @@ def doc_to_text(src: str, tgt: str) -> str: src_name, tgt_name = map(code_to_language_name, [src, tgt]) return f"""\ -{src_name} sentence: {jinja_var('sentence_' + src)} +{src_name} sentence: {jinja_var("sentence_" + src)} {tgt_name} sentence:""" diff --git a/lm_eval/tasks/score/NON_GREEDY.md b/lm_eval/tasks/score/NON_GREEDY.md new file mode 100644 index 0000000000000000000000000000000000000000..41da5d3d1b6448c213bdb00ec20043d75702f60b --- /dev/null +++ b/lm_eval/tasks/score/NON_GREEDY.md @@ -0,0 +1,45 @@ +``` +Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +```` +# Non Greedy Evaluation + +This task checks for model's consistency towards seed changes during generation. +More particularly it evaluates the model's accuracy and consistancy rate with 5 +different seeds (seed = 1, 2,...,5) for a fixed prompt with temperature set to 0.7. + +## How to run the Non-Greedy evaluation of SCORE? + +Evaluation for non greedy tasks differs a bit from other score tasks as it is required to pass different seeds as an argument manually. Below you can find the step-by-step guide on how to correctly run the **Score Non-Greedy** evaluation. + +To run the evaluation of the Non-Greedy tasks with 5 different seeds you should: +1. For a given dataset run the evaluation by + * specifying the task as `score_non_greedy_robustness_{DATASET_NAME}` (`DATASET_NAME` being either`agieval`, `mmlu_pro` or `math`) + * fixing the seed with the run argument `--seed=1` + * passing the `--log_samples` argument* + * specifying an output with `--output_path=SOME_OUTPUT_PATH/seed_1` + * if running with vllm it is important to set the seed in the `--model_args` just by specifying the `seed` parameter\ + +2. Repeat the process for 5 times**, changing the `--seed` and the `--output_path` arguments accordingly from 1 to 5. + +3. When all 5 runs are finished and logs are saved, run the `./lm_eval/tasks/score/non_greedy_summarizer.py` script by passing the the output directory of the above runs to the `--log_dir` argument***, and by specifying the dataset name for which the evaluations were run with `--dataset` argument(`agieval`, `mmlu_pro` or `math`). \ + +4. The script will return the default lm_evaluation_harness table where accuracies for each seed and the consistancy rate are calculated. + + +\* _As this evaluation requires `--log_samples` to be True, it will need some extra disk space to save the prediction results for each seed._ + +\*\* _Refer to [`./lm_eval/tasks/score/non_greedy.sh`](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/lm_eval/tasks/score/non_greedy.sh) to see an example of non greedy evaluation command for each seed._ + +\*\*\* _To `--log_dir` argument one should pass the path of the parent folder of `"seed_1", "seed_2", ...` directories, that is not necessarily the `--output_path` passed to the evaulater in the 1st step._ diff --git a/lm_eval/tasks/score/README.md b/lm_eval/tasks/score/README.md index 4055d5f76c2d1c535e2fc11058862f1418548427..a0bf7d9268cee6f3308c701b73c058ce47b57c4a 100644 --- a/lm_eval/tasks/score/README.md +++ b/lm_eval/tasks/score/README.md @@ -31,7 +31,7 @@ limitations under the License. ## Tasks -Both `score_robustness_mmlu_pro` and `score_robustness_agieval` contain the following 2 tasks: +Both `score_robustness_mmlu_pro` and `score_robustness_agieval` contain the following 3 tasks: * Option order robustness: `score_option_order_robustness_mmlu_pro`, @@ -41,10 +41,14 @@ Both `score_robustness_mmlu_pro` and `score_robustness_agieval` contain the foll `score_prompt_robustness_mmlu_pro`, `score_prompt_robustness_agieval`, -Whereas math contains only +* Non greedy robustness +`score_non_greedy_robustness_mmlu_pro`, +`score_non_greedy_robustness_agieval`, + +Whereas math contains the following 2: * Prompt robustness: `score_prompt_robustness_math` - +`score_non_greedy_robustness_math`, ### Option order robustness @@ -55,6 +59,10 @@ Measures the model's robustness to the placement of the correct answer in the op Measures the model's robustness to 10 different prompts. list of the prompts can be found in the `./prompt_templates.json` file under the key `prompt_robustness`. +### Non greedy robustness + +Measures the model's robustness to 5 different seeds: seeds = \[1-5\]. For evaluating on the non greedy task, please, refer to [NON_GREEDY.md](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/lm_eval/tasks/score/NON_GREEDY.md) + ## Metrics All robustness tasks calculate 2 metrics: *Accuracy* and *Consistency Rate(CR)* [[4](#cr)]. diff --git a/lm_eval/tasks/score/agi_eval/non_greedy_robustness_agieval_aqua_rat.yaml b/lm_eval/tasks/score/agi_eval/non_greedy_robustness_agieval_aqua_rat.yaml new file mode 100644 index 0000000000000000000000000000000000000000..126630f0cb638e30ff31956be7f51895288246e4 --- /dev/null +++ b/lm_eval/tasks/score/agi_eval/non_greedy_robustness_agieval_aqua_rat.yaml @@ -0,0 +1,36 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +task: non_greedy_robustness_agieval_aqua_rat +dataset_path: hails/agieval-aqua-rat +dataset_name: default +output_type: generate_until +test_split: test +process_docs: !function utils_agieval.non_greedy_robustness_process_docs +doc_to_text: !function utils_agieval.agi_eval_robustness_doc_to_text +doc_to_target: answer +generation_kwargs: + max_gen_toks: 1024 + do_sample: true + temperature: 0.7 + until: [] +process_results: !function utils_agieval.non_greedy_robustness_process_results +metric_list: + - metric: non_greedy_accuracy + aggregation: !function utils_agieval.non_greedy_accuracy + higher_is_better: true +metadata: + version: 1.0 +dataset_kwargs: + trust_remote_code: true diff --git a/lm_eval/tasks/score/agi_eval/non_greedy_robustness_agieval_logiqa_en.yaml b/lm_eval/tasks/score/agi_eval/non_greedy_robustness_agieval_logiqa_en.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ad1790e86bde1365f0ccfc2b6e006e5e7c118db4 --- /dev/null +++ b/lm_eval/tasks/score/agi_eval/non_greedy_robustness_agieval_logiqa_en.yaml @@ -0,0 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include: non_greedy_robustness_agieval_aqua_rat.yaml +task: non_greedy_robustness_agieval_logiqa_en +dataset_path: hails/agieval-logiqa-en diff --git a/lm_eval/tasks/score/agi_eval/non_greedy_robustness_agieval_lsat_rc.yaml b/lm_eval/tasks/score/agi_eval/non_greedy_robustness_agieval_lsat_rc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a0ebf3408e6a000cb564335a51ff5bcd59e421df --- /dev/null +++ b/lm_eval/tasks/score/agi_eval/non_greedy_robustness_agieval_lsat_rc.yaml @@ -0,0 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include: non_greedy_robustness_agieval_aqua_rat.yaml +task: non_greedy_robustness_agieval_lsat_rc +dataset_path: hails/agieval-lsat-rc diff --git a/lm_eval/tasks/score/agi_eval/non_greedy_robustness_agieval_lstat_ar.yaml b/lm_eval/tasks/score/agi_eval/non_greedy_robustness_agieval_lstat_ar.yaml new file mode 100644 index 0000000000000000000000000000000000000000..666dace1e2a5e255d51e14809a76ca678a3176d7 --- /dev/null +++ b/lm_eval/tasks/score/agi_eval/non_greedy_robustness_agieval_lstat_ar.yaml @@ -0,0 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include: non_greedy_robustness_agieval_aqua_rat.yaml +task: non_greedy_robustness_agieval_lsat_ar +dataset_path: hails/agieval-lsat-ar diff --git a/lm_eval/tasks/score/agi_eval/non_greedy_robustness_agieval_lstat_lr.yaml b/lm_eval/tasks/score/agi_eval/non_greedy_robustness_agieval_lstat_lr.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d780d7ebd28822c686c2049d0de78e10f91cecb4 --- /dev/null +++ b/lm_eval/tasks/score/agi_eval/non_greedy_robustness_agieval_lstat_lr.yaml @@ -0,0 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include: non_greedy_robustness_agieval_aqua_rat.yaml +task: non_greedy_robustness_agieval_lsat_lr +dataset_path: hails/agieval-lsat-lr diff --git a/lm_eval/tasks/score/agi_eval/non_greedy_robustness_agieval_sat_en.yaml b/lm_eval/tasks/score/agi_eval/non_greedy_robustness_agieval_sat_en.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3a7ba4ed32b22859251b74b27491011aac43a69d --- /dev/null +++ b/lm_eval/tasks/score/agi_eval/non_greedy_robustness_agieval_sat_en.yaml @@ -0,0 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include: non_greedy_robustness_agieval_aqua_rat.yaml +task: non_greedy_robustness_agieval_sat_en +dataset_path: hails/agieval-sat-en diff --git a/lm_eval/tasks/score/agi_eval/non_greedy_robustness_agieval_sat_math.yaml b/lm_eval/tasks/score/agi_eval/non_greedy_robustness_agieval_sat_math.yaml new file mode 100644 index 0000000000000000000000000000000000000000..34e4beeb5ae7cf6fefd500b0abb27185b7c78f8b --- /dev/null +++ b/lm_eval/tasks/score/agi_eval/non_greedy_robustness_agieval_sat_math.yaml @@ -0,0 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include: non_greedy_robustness_agieval_aqua_rat.yaml +task: non_greedy_robustness_agieval_sat_math +dataset_path: hails/agieval-sat-math diff --git a/lm_eval/tasks/score/agi_eval/prompt_templates.json b/lm_eval/tasks/score/agi_eval/prompt_templates.json index 720a66355e7777236e5ae2578257a3a959a7001a..979b53e64f40c1399df548aa6252104c3e78bc7d 100644 --- a/lm_eval/tasks/score/agi_eval/prompt_templates.json +++ b/lm_eval/tasks/score/agi_eval/prompt_templates.json @@ -1,9 +1,13 @@ { "option_order_robustness":{ - "prompt": "For the multiple-choice question, which option (A-E) is correct?.\n\nQuestion: {question}{options}\n\nEnd the answer with the following:\nThe best answer is (the_answer_letter) where the (the_answer_letter) is one of 'A', 'B', 'C', 'D' or 'E'.", + "prompt": "For the multiple-choice question, which option (A-E) is correct?.\n\nQuestion:{question}{options}\nEnd the answer with the following:\nThe best answer is (the_answer_letter) where the (the_answer_letter) is one of 'A', 'B', 'C', 'D' or 'E'.", "options_format": "\n{letter}: {option}" }, + "non_greedy_robustness":{ + "prompt": "For the multiple-choice question, which option (A-E) is correct?.\n\nQuestion:{question}{options}\nEnd the answer with the following:\nThe best answer is (the_answer_letter) where the (the_answer_letter) is one of 'A', 'B', 'C', 'D' or 'E'.", + "options_format": "\n{letter}: {option}" + }, "prompt_robustness":[ { diff --git a/lm_eval/tasks/score/agi_eval/score_non_greedy_robustness_agieval.yaml b/lm_eval/tasks/score/agi_eval/score_non_greedy_robustness_agieval.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c5ab43bec4e8e1fc71d4670995df383eef83564f --- /dev/null +++ b/lm_eval/tasks/score/agi_eval/score_non_greedy_robustness_agieval.yaml @@ -0,0 +1,31 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +group: score_non_greedy_robustness_agieval +task: + - non_greedy_robustness_agieval_aqua_rat + - non_greedy_robustness_agieval_logiqa_en + - non_greedy_robustness_agieval_lsat_ar + - non_greedy_robustness_agieval_lsat_lr + - non_greedy_robustness_agieval_lsat_rc + - non_greedy_robustness_agieval_sat_en + - non_greedy_robustness_agieval_sat_math + +aggregate_metric_list: + - metric: non_greedy_accuracy + aggregation: mean + weight_by_size: true + +metadata: + version: 1.0 diff --git a/lm_eval/tasks/score/agi_eval/score_robustness_agieval.yaml b/lm_eval/tasks/score/agi_eval/score_robustness_agieval.yaml index 354cb5675c814aa5ce35f20c758f4a10874eab21..fe6d8a731116521169036caf22583a169945a613 100644 --- a/lm_eval/tasks/score/agi_eval/score_robustness_agieval.yaml +++ b/lm_eval/tasks/score/agi_eval/score_robustness_agieval.yaml @@ -16,5 +16,6 @@ group: score_robustness_agieval task: - score_prompt_robustness_agieval - score_option_order_robustness_agieval + - score_non_greedy_robustness_agieval metadata: version: 1.0 diff --git a/lm_eval/tasks/score/agi_eval/utils_agieval.py b/lm_eval/tasks/score/agi_eval/utils_agieval.py index b8034259d8bd48686040351ac50b90230e7948c2..4381a2cb34c771f65b4c76240def2c29bd3a314f 100644 --- a/lm_eval/tasks/score/agi_eval/utils_agieval.py +++ b/lm_eval/tasks/score/agi_eval/utils_agieval.py @@ -29,6 +29,7 @@ TEMPLATE_FILE_PATH = os.path.join(os.path.dirname(__file__), "prompt_templates.j PROMPT_ROBUSTNESS_TEMPLATE_KEY = "prompt_robustness" OPTION_ORDER_ROBUSTNESS_TEMPLATE_KEY = "option_order_robustness" +NON_GREEDY_ROBUSTNESS_TEMPLATE_KEY = "non_greedy_robustness" QUESTION_KEY = "query" ANSWER_INDEX_KEY = "gold" @@ -93,6 +94,13 @@ option_order_robustness_process_docs = partial( dataset_specific_preprocess=initial_process_docs, ) +non_greedy_robustness_process_docs = partial( + utils.non_greedy_robustness_process_docs, + templates_key=NON_GREEDY_ROBUSTNESS_TEMPLATE_KEY, + template_file_path=TEMPLATE_FILE_PATH, + dataset_specific_preprocess=initial_process_docs, +) + def prompt_robustness_process_results(doc, results) -> Dict[str, float]: final_answer = utils.__postprocess_pred(results[0]) @@ -135,6 +143,17 @@ def option_order_robustness_process_results(doc, results) -> Dict[str, float]: } +def non_greedy_robustness_process_results(doc, results) -> Dict[str, float]: + final_answer = utils.__postprocess_pred(results[0]) + final_answer = utils.translate_model_answer_to_labels( + final_answer, option_format=doc["options_format"], labels=LABELS + ) + question_id = doc["question_id"] + gt = LABELS[doc["answer_index"]] + + return {"non_greedy_accuracy": (question_id, final_answer, gt, None)} + + def per_prompt_accuracy(results: List[Dict[str, Any]], p_id=0) -> float: accuracies = [] for result in results: @@ -181,3 +200,16 @@ per_option_accuracy_c = partial(per_option_accuracy, always_opt="C") per_option_accuracy_d = partial(per_option_accuracy, always_opt="D") options_consistency_rate = partial(utils.options_consistency_rate, labels=LABELS) + + +def non_greedy_accuracy(results: List[Dict[str, Any]]) -> float: + accuracies = [] + for result in results: + question_id, final_answer, gt, category = result + + accuracies.append(final_answer == gt) + + accuracy = sum(accuracies) / len(accuracies) + eval_logger.info(f"Non greedy accuracy: {accuracy}") + + return np.round(accuracy, 4) diff --git a/lm_eval/tasks/score/math/non_greedy_robustness_math_algebra.yaml b/lm_eval/tasks/score/math/non_greedy_robustness_math_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0ca1493ff74442744a40c20af82b22f9e2c90c8b --- /dev/null +++ b/lm_eval/tasks/score/math/non_greedy_robustness_math_algebra.yaml @@ -0,0 +1,36 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +task: non_greedy_robustness_math_algebra +dataset_path: EleutherAI/hendrycks_math +dataset_name: algebra +output_type: generate_until +test_split: test +process_docs: !function utils_math.non_greedy_robustness_process_docs +doc_to_text: !function utils_math.math_robustness_doc_to_text +doc_to_target: answer +generation_kwargs: + max_gen_toks: 1024 + do_sample: true + temperature: 0.7 + until: [] +process_results: !function utils_math.non_greedy_robustness_process_results +metric_list: + - metric: non_greedy_accuracy + aggregation: !function utils_math.non_greedy_accuracy + higher_is_better: true +metadata: + version: 1.0 +dataset_kwargs: + trust_remote_code: true diff --git a/lm_eval/tasks/score/math/non_greedy_robustness_math_counting_and_prob.yaml b/lm_eval/tasks/score/math/non_greedy_robustness_math_counting_and_prob.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4f74c68d721adfb24bcc078b1e866372155038e3 --- /dev/null +++ b/lm_eval/tasks/score/math/non_greedy_robustness_math_counting_and_prob.yaml @@ -0,0 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include: non_greedy_robustness_math_algebra.yaml +dataset_name: counting_and_probability +task: non_greedy_robustness_math_counting_and_prob diff --git a/lm_eval/tasks/score/math/non_greedy_robustness_math_geometry.yaml b/lm_eval/tasks/score/math/non_greedy_robustness_math_geometry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6adb0cdc0d67b76294ba4c5cb3acd12abdfa4f80 --- /dev/null +++ b/lm_eval/tasks/score/math/non_greedy_robustness_math_geometry.yaml @@ -0,0 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include: non_greedy_robustness_math_algebra.yaml +dataset_name: geometry +task: non_greedy_robustness_math_geometry diff --git a/lm_eval/tasks/score/math/non_greedy_robustness_math_intermediate_algebra.yaml b/lm_eval/tasks/score/math/non_greedy_robustness_math_intermediate_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3efe9cc0ad0b1575c4e969f152d58951a07d3770 --- /dev/null +++ b/lm_eval/tasks/score/math/non_greedy_robustness_math_intermediate_algebra.yaml @@ -0,0 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include: non_greedy_robustness_math_algebra.yaml +dataset_name: intermediate_algebra +task: non_greedy_robustness_math_intermediate_algebra diff --git a/lm_eval/tasks/score/math/non_greedy_robustness_math_num_theory.yaml b/lm_eval/tasks/score/math/non_greedy_robustness_math_num_theory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2a089c16ea34bc29a0cdb1a3ad7bab506c84a0ae --- /dev/null +++ b/lm_eval/tasks/score/math/non_greedy_robustness_math_num_theory.yaml @@ -0,0 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include: non_greedy_robustness_math_algebra.yaml +dataset_name: number_theory +task: non_greedy_robustness_math_num_theory diff --git a/lm_eval/tasks/score/math/non_greedy_robustness_math_prealgebra.yaml b/lm_eval/tasks/score/math/non_greedy_robustness_math_prealgebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b292bc7e63caf716a7d025d64987eadcc14855df --- /dev/null +++ b/lm_eval/tasks/score/math/non_greedy_robustness_math_prealgebra.yaml @@ -0,0 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include: non_greedy_robustness_math_algebra.yaml +dataset_name: prealgebra +task: non_greedy_robustness_math_prealgebra diff --git a/lm_eval/tasks/score/math/non_greedy_robustness_math_precalc.yaml b/lm_eval/tasks/score/math/non_greedy_robustness_math_precalc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de773fd9af01443e02fd5a98f75c1b12bcd06e6c --- /dev/null +++ b/lm_eval/tasks/score/math/non_greedy_robustness_math_precalc.yaml @@ -0,0 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include: non_greedy_robustness_math_algebra.yaml +dataset_name: precalculus +task: non_greedy_robustness_math_precalc diff --git a/lm_eval/tasks/score/math/prompt_templates.json b/lm_eval/tasks/score/math/prompt_templates.json index 072f574034f7b51a8368558cd0a1f5016f4f51b8..e4cf071b009b01d27119d2a9a91f2405cf767e19 100644 --- a/lm_eval/tasks/score/math/prompt_templates.json +++ b/lm_eval/tasks/score/math/prompt_templates.json @@ -1,4 +1,8 @@ { + "non_greedy_robustness": { + "prompt": "Calculate the answer to this math problem\nProblem: {question}\nConclude your answer with:\nThe final answer is: $\\boxed{{answer}}$\nwhere [answer] is just the final number or expression that solves the problem." + }, + "prompt_robustness": [ { "prompt": "Efficiently solve the following math challenge. Explain your approach step-by-step\nThe answer should end with: The final answer is: $\\boxed{{answer}}$\nwhere [answer] is just the final number or expression that solves the problem\nProblem: {question}\nLets think step by step" diff --git a/lm_eval/tasks/score/math/score_non_greedy_robustness_math.yaml b/lm_eval/tasks/score/math/score_non_greedy_robustness_math.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d3bf72d39555b46e3b7d3848cdadf6fbc4195339 --- /dev/null +++ b/lm_eval/tasks/score/math/score_non_greedy_robustness_math.yaml @@ -0,0 +1,30 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +group: score_non_greedy_robustness_math +task: + - non_greedy_robustness_math_algebra + - non_greedy_robustness_math_counting_and_prob + - non_greedy_robustness_math_geometry + - non_greedy_robustness_math_intermediate_algebra + - non_greedy_robustness_math_num_theory + - non_greedy_robustness_math_prealgebra + - non_greedy_robustness_math_precalc + +aggregate_metric_list: + - metric: non_greedy_accuracy + aggregation: mean + weight_by_size: true +metadata: + version: 1.0 diff --git a/lm_eval/tasks/score/math/score_robustness_math.yaml b/lm_eval/tasks/score/math/score_robustness_math.yaml index f3b733667b5ad948b643bb2062fb70c30016c55e..472a5b4906ded2de1ee8fee0ed9e1b5f1341e01b 100644 --- a/lm_eval/tasks/score/math/score_robustness_math.yaml +++ b/lm_eval/tasks/score/math/score_robustness_math.yaml @@ -15,5 +15,6 @@ group: score_robustness_math task: - score_prompt_robustness_math + - score_non_greedy_robustness_math metadata: version: 1.0 diff --git a/lm_eval/tasks/score/math/utils_math.py b/lm_eval/tasks/score/math/utils_math.py index cf41473ae89a58a956dbcf97748359cd5e016f4c..4068b179882718afe79dd61a967b4c531ee0e58b 100644 --- a/lm_eval/tasks/score/math/utils_math.py +++ b/lm_eval/tasks/score/math/utils_math.py @@ -34,6 +34,7 @@ from lm_eval.utils import eval_logger TEMPLATE_FILE_PATH = os.path.join(os.path.dirname(__file__), "prompt_templates.json") PROMPT_ROBUSTNESS_TEMPLATE_KEY = "prompt_robustness" +NON_GREEDY_ROBUSTNESS_TEMPLATE_KEY = "non_greedy_robustness" math_robustness_doc_to_text = robustness_doc_to_text @@ -141,8 +142,17 @@ def prompt_robustness_process_docs(doc: datasets.Dataset) -> datasets.Dataset: doc = process_docs(doc) return utils.process_docs_add_prompts( doc, - PROMPT_ROBUSTNESS_TEMPLATE_KEY, - TEMPLATE_FILE_PATH, + templates_key=PROMPT_ROBUSTNESS_TEMPLATE_KEY, + template_file_path=TEMPLATE_FILE_PATH, + ) + + +def non_greedy_robustness_process_docs(doc: datasets.Dataset) -> datasets.Dataset: + doc = process_docs(doc) + return utils.non_greedy_robustness_process_docs( + doc, + templates_key=NON_GREEDY_ROBUSTNESS_TEMPLATE_KEY, + template_file_path=TEMPLATE_FILE_PATH, ) @@ -163,6 +173,13 @@ def process_results(doc: dict, results: List[str]) -> Dict[str, int]: return results +def non_greedy_robustness_process_results( + doc: dict, results: List[str] +) -> Dict[str, int]: + answer = extract_answer(results[0]) + return {"non_greedy_accuracy": (doc["question_id"], answer, doc["answer"], None)} + + def per_prompt_accuracy(results: List[Dict[str, Any]], p_id=0) -> float: accuracies = [] for result in results: @@ -233,3 +250,19 @@ def math_prompt_consistency_rate(results: List[Dict[str, Any]]) -> float: question_answers_list = [answers for answers in question_answers_dict.values()] return calculate_consistency_rate(question_answers_list) + + +def non_greedy_accuracy(results: List[Dict[str, Any]]) -> float: + accuracies = [] + for result in results: + question_id, final_answer, gt, _ = result + if math_equal(final_answer, gt): + retval = 1 + else: + retval = 0 + accuracies.append(retval) + + accuracy = sum(accuracies) / len(accuracies) + eval_logger.info(f"Non greedy accuracy: {accuracy}") + + return np.round(accuracy, 4) diff --git a/lm_eval/tasks/score/mmlu_pro/prompt_templates.json b/lm_eval/tasks/score/mmlu_pro/prompt_templates.json index 57278cd17e0eddc675c08dec4e888ef982eb4d5e..008598ba857c2deca97d7d7f2a02c72deb14adb0 100644 --- a/lm_eval/tasks/score/mmlu_pro/prompt_templates.json +++ b/lm_eval/tasks/score/mmlu_pro/prompt_templates.json @@ -1,6 +1,11 @@ { "option_order_robustness":{ - "prompt": "For the multiple-choice question related to {category}, which option (A-J) is correct?.\n\nQuestion: {question}{options}\n\nEnd the answer with the following:\nThe best answer is (the_answer_letter) where the (the_answer_letter) is one of 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I' or 'J'.", + "prompt": "For the multiple-choice question related to {category}, which option (A-J) is correct?.\n\nQuestion:{question}{options}\nEnd the answer with the following:\nThe best answer is (the_answer_letter) where the (the_answer_letter) is one of 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I' or 'J'.", + "options_format": "\n{letter}: {option}" + }, + + "non_greedy_robustness":{ + "prompt": "For the multiple-choice question related to {category}, which option (A-J) is correct?.\n\nQuestion:{question}{options}\nEnd the answer with the following:\nThe best answer is (the_answer_letter) where the (the_answer_letter) is one of 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I' or 'J'.", "options_format": "\n{letter}: {option}" }, diff --git a/lm_eval/tasks/score/mmlu_pro/score_non_greedy_robustness_mmlu_pro.yaml b/lm_eval/tasks/score/mmlu_pro/score_non_greedy_robustness_mmlu_pro.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1ee8ee5f9a926d6da1e7e1aa5839a23c385d6a21 --- /dev/null +++ b/lm_eval/tasks/score/mmlu_pro/score_non_greedy_robustness_mmlu_pro.yaml @@ -0,0 +1,38 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +tag: score_robustness_mmlu_pro +task: score_non_greedy_robustness_mmlu_pro +dataset_path: TIGER-Lab/MMLU-Pro +dataset_name: default +output_type: generate_until +validation_split: validation +test_split: test +process_docs: !function utils_mmlu_pro.non_greedy_robustness_process_docs +doc_to_text: !function utils_mmlu_pro.mmlu_pro_robustness_doc_to_text +doc_to_target: answer +generation_kwargs: + until: [] + max_gen_toks: 1024 + do_sample: true + temperature: 0.7 +process_results: !function utils_mmlu_pro.non_greedy_robustness_process_results +metric_list: + - metric: non_greedy_macro_accuracy + aggregation: !function utils_mmlu_pro.non_greedy_macro_accuracy + higher_is_better: true +metadata: + version: 1.0 +dataset_kwargs: + trust_remote_code: true diff --git a/lm_eval/tasks/score/mmlu_pro/utils_mmlu_pro.py b/lm_eval/tasks/score/mmlu_pro/utils_mmlu_pro.py index 4dd4b6570319d758d420770ed59f905b6312df7a..da46e10170f88820c290ecbb2154058dd07ab8a7 100644 --- a/lm_eval/tasks/score/mmlu_pro/utils_mmlu_pro.py +++ b/lm_eval/tasks/score/mmlu_pro/utils_mmlu_pro.py @@ -27,6 +27,7 @@ TEMPLATE_FILE_PATH = os.path.join(os.path.dirname(__file__), "prompt_templates.j PROMPT_ROBUSTNESS_TEMPLATE_KEY = "prompt_robustness" OPTION_ORDER_ROBUSTNESS_TEMPLATE_KEY = "option_order_robustness" +NON_GREEDY_ROBUSTNESS_TEMPLATE_KEY = "non_greedy_robustness" QUESTION_KEY = "question" @@ -48,6 +49,23 @@ option_order_robustness_process_docs = partial( templates_key=OPTION_ORDER_ROBUSTNESS_TEMPLATE_KEY, labels=LABELS, ) +non_greedy_robustness_process_docs = partial( + utils.non_greedy_robustness_process_docs, + template_file_path=TEMPLATE_FILE_PATH, + templates_key=NON_GREEDY_ROBUSTNESS_TEMPLATE_KEY, +) + + +def non_greedy_robustness_process_results(doc, results) -> Dict[str, float]: + final_answer = utils.__postprocess_pred(results[0]) + final_answer = utils.translate_model_answer_to_labels( + final_answer, option_format=doc["options_format"], labels=LABELS + ) + question_id = doc["question_id"] + category = doc["category"] + gt = LABELS[doc["answer_index"]] + + return {"non_greedy_macro_accuracy": (question_id, final_answer, gt, category)} def prompt_robustness_process_results(doc, results) -> Dict[str, float]: @@ -162,3 +180,18 @@ per_option_macro_accuracy_i = partial(per_option_macro_accuracy, always_opt="I") per_option_macro_accuracy_j = partial(per_option_macro_accuracy, always_opt="J") options_consistency_rate = partial(utils.options_consistency_rate, labels=LABELS) + + +def non_greedy_macro_accuracy(results: List[Dict[str, Any]]) -> float: + accuracies = {} + for result in results: + question_id, final_answer, gt, category = result + if category not in accuracies: + accuracies[category] = [] + accuracies[category].append(final_answer == gt) + + for key in accuracies: + accuracies[key] = sum(accuracies[key]) / len(accuracies[key]) + eval_logger.info(f"Non greedy, category - {key} accuracy: {accuracies[key]}") + + return np.round(np.mean([v for v in accuracies.values()]), 4) diff --git a/lm_eval/tasks/score/non_greedy.sh b/lm_eval/tasks/score/non_greedy.sh new file mode 100755 index 0000000000000000000000000000000000000000..3dd6fe8f081ffa8af42581989b7ec1622c4e44df --- /dev/null +++ b/lm_eval/tasks/score/non_greedy.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +helpFunction() +{ + echo "" + echo "Usage: $0 -m MODEL -t TASK -s SEED -o OUTPUT_DIR" + echo -e "\t-m huggingface model name" + echo -e "\t-t task name one of score_non_greedy_robustness_[agieval|mmlu_pro|math]" + echo -e "\t-s random seed for evaluation [1-5]" + echo -e "\t-o output directory" + exit 1 # Exit script after printing help +} + +while getopts "m:t:s:" opt +do + case "$opt" in + m ) MODEL="$OPTARG" ;; + t ) TASK="$OPTARG" ;; + s ) SEED="$OPTARG" ;; + o ) OUTPUT_DIR="$OPTARG" ;; + ? ) helpFunction ;; # Print helpFunction in case parameter is non-existent + esac +done + +if [ -z "$MODEL" ] | [ -z "$TASK" ] | [ -z "$SEED" ] | [ -z "$OUTPUT_DIR" ] +then + echo "Some or all of the parameters are empty"; + helpFunction +fi + +echo "evaluating $MODEL on task $TASK with seed $SEED" +echo "output will be saved in $OUTPUT_DIR" + +TENSOR_PARALLEL=8 +BATCH_SIZE="auto" + +echo "running evaluation on vllm with tensor parallelism $TENSOR_PARALLEL" + +lm_eval --model vllm \\ + --model_args pretrained=$MODEL,dtype=bfloat16,tensor_parallel_size=$TENSOR_PARALLEL,gpu_memory_utilization=0.9,\\ + max_model_len=4096,data_parallel_size=1,disable_custom_all_reduce=True,enforce_eager=False,seed=$SEED\\ + --apply_chat_template \\ + --tasks $TASKS \\ + --batch_size $BATCH_SIZE \\ + --log_samples \\ + --output_path $OUTPUT_DIR \\ diff --git a/lm_eval/tasks/score/non_greedy_summarizer.py b/lm_eval/tasks/score/non_greedy_summarizer.py new file mode 100644 index 0000000000000000000000000000000000000000..a7b78a9e8df274498b916df66b8bcf49e9a77f1a --- /dev/null +++ b/lm_eval/tasks/score/non_greedy_summarizer.py @@ -0,0 +1,305 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import glob +import json +import os +from datetime import datetime +from itertools import combinations +from pathlib import Path +from typing import List + +import pandas as pd + +from lm_eval.tasks.score.math.math_grader import math_equal +from lm_eval.utils import handle_non_serializable, make_table + + +N_SEEDS = 5 + + +def load_json_logs(file_paths, subtasks): + """ + Loads JSON logs of jsonl format from file paths into a single DataFrame. + + Args: + file_paths: List of file paths to the JSON logs. + + Returns: + A DataFrame containing the logs. + """ + per_seed_df = { + "question_id": [], + "final_answer_seed_": [], + "gt": [], + "category": [], + } + _search_key = None + for i in range(len(file_paths)): + file_path = file_paths[i] + with open(file_path, "r") as f: + for line in f: + datapoint = json.loads(line) + if _search_key is None: + if "non_greedy_macro_accuracy" in datapoint: + _search_key = "non_greedy_macro_accuracy" + elif "non_greedy_accuracy" in datapoint: + _search_key = "non_greedy_accuracy" + question_id, final_answer, gt, category = datapoint[_search_key] + if subtasks is not None: + category = subtasks[i] + per_seed_df["question_id"].append(question_id) + per_seed_df["final_answer_seed_"].append(final_answer) + per_seed_df["gt"].append(gt) + per_seed_df["category"].append(category) + df = pd.DataFrame(per_seed_df) + return df + + +def calculate_consistency_rate(responses: List[List[str]]) -> float: + """ + Calculate the Consistency Rate (CR) for a given set of responses. + + Args: + responses: List of lists, where each inner list contains responses to the same question. + + Returns: + The consistency rate as a float. + """ + total_similarity = 0 + total_combinations = 0 + + for response_set in responses: + pairs = combinations(response_set, 2) + num_pairs = len(response_set) * (len(response_set) - 1) / 2 + total_combinations += num_pairs + for answer1, answer2 in pairs: + total_similarity += int(answer1 == answer2) + + return total_similarity / total_combinations if total_combinations > 0 else 0.0 + + +def calculate_math_consistency_rate(responses: List[List[str]]) -> float: + """ + Calculate the Consistency Rate (CR) for a given set of responses. + + Args: + responses: List of lists, where each inner list contains responses to the same question. + + Returns: + The consistency rate as a float. + """ + total_similarity = 0 + total_combinations = 0 + + for response_set in responses: + pairs = combinations(response_set, 2) + num_pairs = len(response_set) * (len(response_set) - 1) / 2 + total_combinations += num_pairs + for answer1, answer2 in pairs: + total_similarity += int(math_equal(answer1, answer2)) + + return total_similarity / total_combinations if total_combinations > 0 else 0.0 + + +def main(): + parser = argparse.ArgumentParser( + description="Calculate consistency rate from JSON logs." + ) + parser.add_argument( + "--log_dir", help="Path to the directory containing the JSON log files." + ) + parser.add_argument("--dataset", help="Dataset name: agieval, mmlu_pro or math") + args = parser.parse_args() + + for seed in range(1, N_SEEDS + 1): + # Checking if directories exist + seed_log_dir = os.path.join(args.log_dir, f"seed_{seed}") + assert os.path.exists(seed_log_dir), ( + f"No logs found for seed={seed}. No directory found at {seed_log_dir}" + ) + subtasks = None + if args.dataset == "agieval": + agieval_subtasks = [ + "aqua_rat", + "logiqa_en", + "lsat_ar", + "lsat_lr", + "lsat_rc", + "sat_en", + "sat_math", + ] + subtasks = agieval_subtasks + file_paths = [] + for subtask in agieval_subtasks: + log_path = os.path.join( + seed_log_dir, + f"*/samples_non_greedy_robustness_agieval_{subtask}_*.jsonl", + ) + subtask_logs = glob.glob(log_path) + if len(subtask_logs) == 0: + raise FileNotFoundError( + f"No logs found for agieval subtask {subtask} for seed={seed} in the path {log_path}." + ) + elif len(subtask_logs) > 1: + raise FileExistsError( + f"Multiple logs found for agieval subtask {subtask} for seed={seed}." + ) + file_paths.append(subtask_logs[0]) + + elif args.dataset == "mmlu_pro": + task_logs = glob.glob( + os.path.join( + seed_log_dir, + "*/samples_score_non_greedy_robustness_mmlu_pro_*.jsonl", + ) + ) + file_paths = [] + if len(task_logs) == 0: + raise FileNotFoundError( + f"No logs found for mmlu_pro for seed={seed}. PATH: {seed_log_dir}" + ) + elif len(task_logs) > 1: + raise FileExistsError( + f"Multiple logs found for mmlu_pro for seed={seed}." + ) + file_paths.append(task_logs[0]) + + elif args.dataset == "math": + math_subtasks = [ + "algebra", + "counting_and_prob", + "geometry", + "intermediate_algebra", + "num_theory", + "prealgebra", + "precalc", + ] + subtasks = math_subtasks + file_paths = [] + + for subtask in math_subtasks: + log_path = os.path.join( + seed_log_dir, + f"*/samples_non_greedy_robustness_math_{subtask}_*.jsonl", + ) + + subtask_logs = glob.glob(log_path) + if len(subtask_logs) == 0: + raise FileNotFoundError( + f"No logs found for math subtask {subtask} for seed={seed} in the path {log_path}." + ) + elif len(subtask_logs) > 1: + raise FileExistsError( + f"Multiple logs found for math subtask {subtask} for seed={seed}." + ) + file_paths.append(subtask_logs[0]) + + else: + raise ValueError( + "Invalid dataset name. only agieval, mmlu_pro and math are supported." + ) + + df = load_json_logs(file_paths, subtasks) + + # merge all dfs by question_id, category and gt + if seed == 1: + df_all = df + df_all[f"final_answer_seed_{seed}"] = df["final_answer_seed_"] + else: + df_all = df_all.merge( + df, on=["question_id", "category"], suffixes=("", seed) + ) + + responses = df_all[ + [f"final_answer_seed_{seed}" for seed in range(1, N_SEEDS + 1)] + ].values.tolist() + + # calculate per seed accuracy + + if args.dataset == "math": + consistency_rate = calculate_math_consistency_rate(responses) + results = {"alias": f"score_non_greedy_robustness_{args.dataset}"} + + results.update( + { + "consistency_rate,none": consistency_rate, + "consistency_rate_stderr,none": "N/A", + } + ) + + for seed in range(1, N_SEEDS + 1): + df_all[f"accuracy_seed_{seed}"] = df_all[ + [f"final_answer_seed_{seed}", "gt"] + ].apply(lambda x: math_equal(*x), axis=1) + accuracy = df_all[f"accuracy_seed_{seed}"].mean() + results[f"seed_{seed}_accuracy,none"] = accuracy + results[f"seed_{seed}_accuracy_stderr,none"] = "N/A" + + else: + consistency_rate = calculate_consistency_rate(responses) + results = {"alias": f"score_non_greedy_robustness_{args.dataset}"} + + results.update( + { + "consistency_rate,none": consistency_rate, + "consistency_rate_stderr,none": "N/A", + } + ) + + for seed in range(1, N_SEEDS + 1): + df_all[f"accuracy_seed_{seed}"] = ( + df_all[f"final_answer_seed_{seed}"] == df_all["gt"] + ) + accuracy = df_all[f"accuracy_seed_{seed}"].mean() + results[f"seed_{seed}_accuracy,none"] = accuracy + results[f"seed_{seed}_accuracy_stderr,none"] = "N/A" + + metrics = [f"seed_{seed}_accuracy" for seed in range(1, N_SEEDS + 1)] + [ + "consistency_rate" + ] + higher_is_better = {metric: True for metric in metrics} + + results_dict = { + "results": {f"score_non_greedy_robustness_{args.dataset}": results}, + "group_subtasks": {f"score_non_greedy_robustness_{args.dataset}": []}, + "configs": None, + "versions": {f"score_non_greedy_robustness_{args.dataset}": 1}, + "n-shot": {f"score_non_greedy_robustness_{args.dataset}": 0}, + "higher_is_better": { + f"score_non_greedy_robustness_{args.dataset}": higher_is_better + }, + "n-samples": None, + } + + dumped = json.dumps( + results_dict, + indent=2, + default=handle_non_serializable, + ensure_ascii=False, + ) + + path = Path(args.log_dir) + path.mkdir(parents=True, exist_ok=True) + + date_id = datetime.now().isoformat().replace(":", "-") + file_results_aggregated = path.joinpath(f"{args.dataset}_results_{date_id}.json") + file_results_aggregated.open("w", encoding="utf-8").write(dumped) + + print(make_table(results_dict)) + + +if __name__ == "__main__": + main() diff --git a/lm_eval/tasks/score/utils.py b/lm_eval/tasks/score/utils.py index 5a7174f1ee9f78044d6d22b26b1587a5c8077d46..61d7e3b035c8a149369ccf5617a0d86e4a75c6d5 100644 --- a/lm_eval/tasks/score/utils.py +++ b/lm_eval/tasks/score/utils.py @@ -130,6 +130,36 @@ def option_order_robustness_process_docs( return doc.map(repeat_doc_swap_correct_answer, batched=True) +def non_greedy_robustness_process_docs( + doc: Dataset, + templates_key: str, + template_file_path: str, + dataset_specific_preprocess: callable = None, +) -> Dataset: + try: + with open(template_file_path) as f: + prompt_template = json.load(f)[templates_key] + prompt = prompt_template["prompt"] + options_format = prompt_template.get("options_format", None) + except FileNotFoundError: + eval_logger.error("Prompt templates not found") + sys.exit() + + if dataset_specific_preprocess is not None: + doc = dataset_specific_preprocess(doc) + + def add_prompt_col(batched_docs): + initial_len = len(next(iter(batched_docs.values()))) + new_batched_docs = copy.deepcopy(batched_docs) + new_batched_docs["prompt"] = [prompt] * initial_len + if options_format is not None: + new_batched_docs["options_format"] = [options_format] * initial_len + + return new_batched_docs + + return doc.map(add_prompt_col, batched=True) + + def robustness_doc_to_text(doc: Dataset) -> str: upper_case = string.ascii_uppercase lower_case = string.ascii_lowercase diff --git a/lm_eval/tasks/scrolls/task.py b/lm_eval/tasks/scrolls/task.py index ac2fed25ae9a0ca7046680e09dc8191bd607c8f5..87372d8ae1f703585e0094595a406bdf5b9824e8 100644 --- a/lm_eval/tasks/scrolls/task.py +++ b/lm_eval/tasks/scrolls/task.py @@ -4,7 +4,8 @@ from functools import reduce import numpy as np import transformers.data.metrics.squad_metrics as squad_metrics -from datasets import Dataset, load_metric +from datasets import Dataset +from evaluate import load from transformers import AutoTokenizer from lm_eval.api.instance import Instance @@ -48,7 +49,10 @@ def _download_metric(): from huggingface_hub import hf_hub_download scrolls_metric_path = hf_hub_download( - repo_id="tau/scrolls", repo_type="dataset", filename="metrics/scrolls.py" + repo_id="tau/scrolls", + repo_type="dataset", + filename="metrics/scrolls.py", + revision="refs/pr/5", ) updated_scrolls_metric_path = ( os.path.dirname(scrolls_metric_path) @@ -119,7 +123,7 @@ class _SCROLLSTask(ConfigurableTask): def __init__(self, config=None): super().__init__(config={"metadata": {"version": self.VERSION}}) if self.DATASET_NAME is not None: - self.metric = load_metric(_download_metric(), config_name=self.DATASET_NAME) + self.metric = load(_download_metric(), config_name=self.DATASET_NAME) def has_training_docs(self): return True @@ -253,11 +257,14 @@ class _SCROLLSMultipleChoiceTask(_SCROLLSTask): } def construct_requests(self, doc, ctx, **kwargs): + apply_chat_template = kwargs.pop("apply_chat_template", False) request_list = [ Instance( request_type="loglikelihood", doc=doc, - arguments=(ctx, " {}".format(choice)), + arguments=(ctx, " {}".format(choice)) + if not apply_chat_template + else (ctx, "{}".format(choice)), idx=i, **kwargs, ) @@ -285,6 +292,7 @@ class _SCROLLSSummaryTask(_SCROLLSTask): } def construct_requests(self, doc, ctx, **kwargs): + kwargs.pop("apply_chat_template", False) return Instance( request_type="generate_until", doc=doc, @@ -327,19 +335,22 @@ class Qasper(_SCROLLSTask): return {"f1": (prediction, doc["outputs"])} def construct_requests(self, doc, ctx, **kwargs): + apply_chat_template = kwargs.pop("apply_chat_template", False) if doc["is_yes_no"]: return [ Instance( request_type="loglikelihood", doc=doc, - arguments=(ctx, " yes"), + arguments=(ctx, " yes") + if not apply_chat_template + else (ctx, "yes"), idx=0, **kwargs, ), Instance( request_type="loglikelihood", doc=doc, - arguments=(ctx, " no"), + arguments=(ctx, " no") if not apply_chat_template else (ctx, "no"), idx=1, **kwargs, ), @@ -406,6 +417,7 @@ class NarrativeQA(_SCROLLSTask): return {"f1": (results[0], doc["outputs"])} def construct_requests(self, doc, ctx, **kwargs): + kwargs.pop("apply_chat_template", False) return Instance( request_type="generate_until", doc=doc, diff --git a/lm_eval/tasks/spanish_bench/flores_es/create_yamls_flores_es.py b/lm_eval/tasks/spanish_bench/flores_es/create_yamls_flores_es.py index bf4d49d2c14a78b20037453b312f9cbc29a7558d..709a36759ca82f91db6d1d75de224dc3e6dd9726 100644 --- a/lm_eval/tasks/spanish_bench/flores_es/create_yamls_flores_es.py +++ b/lm_eval/tasks/spanish_bench/flores_es/create_yamls_flores_es.py @@ -258,7 +258,7 @@ def doc_to_text(src: str, tgt: str) -> str: src_name, tgt_name = map(code_to_language_name, [src, tgt]) return f"""\ -{src_name} sentence: {jinja_var('sentence_' + src)} +{src_name} sentence: {jinja_var("sentence_" + src)} {tgt_name} sentence:""" diff --git a/lm_eval/tasks/spanish_bench/phrases_es/_phrases_es_common.yaml b/lm_eval/tasks/spanish_bench/phrases_es/_phrases_es_common similarity index 100% rename from lm_eval/tasks/spanish_bench/phrases_es/_phrases_es_common.yaml rename to lm_eval/tasks/spanish_bench/phrases_es/_phrases_es_common diff --git a/lm_eval/tasks/spanish_bench/phrases_es/phrases_es-va.yaml b/lm_eval/tasks/spanish_bench/phrases_es/phrases_es-va.yaml index 546f914dd6c685f71b66104a86af3d4009446c9e..bb419e1d04c32f79b57113b4dfaecd7f1100cb6a 100644 --- a/lm_eval/tasks/spanish_bench/phrases_es/phrases_es-va.yaml +++ b/lm_eval/tasks/spanish_bench/phrases_es/phrases_es-va.yaml @@ -1,5 +1,5 @@ # File generated by `create-yamls.py` -include: _phrases_es_common.yaml +include: _phrases_es_common task: phrases_es-va doc_to_text: 'Oració en espanyol: {{es}} diff --git a/lm_eval/tasks/spanish_bench/phrases_es/phrases_va-es.yaml b/lm_eval/tasks/spanish_bench/phrases_es/phrases_va-es.yaml index b0028666d0dbfb3050a8537a56203e3f1e5455f7..c85c26ef5adc88ce9159797276db6f36422c0b5c 100644 --- a/lm_eval/tasks/spanish_bench/phrases_es/phrases_va-es.yaml +++ b/lm_eval/tasks/spanish_bench/phrases_es/phrases_va-es.yaml @@ -1,5 +1,5 @@ # File generated by `create-yamls.py` -include: _phrases_es_common.yaml +include: _phrases_es_common task: phrases_va-es doc_to_text: 'Oració en valencià: {{va}} diff --git a/lm_eval/tasks/squadv2/task.py b/lm_eval/tasks/squadv2/task.py index 184a5978e74d25327f0cca4677fecb93b2510309..5a77cb5fdb98c1035779107e1802c4df8f3b5f4c 100644 --- a/lm_eval/tasks/squadv2/task.py +++ b/lm_eval/tasks/squadv2/task.py @@ -58,9 +58,9 @@ class SQuAD2(ConfigurableTask): super().__init__(config={"metadata": {"version": self.VERSION}}) # HF changed squad on us so we have to make sure we aren't running the old one - assert version.parse(datasets.__version__) >= version.parse( - "1.11.0" - ), "datasets v1.11.0 or later required for SQuAD" + assert version.parse(datasets.__version__) >= version.parse("1.11.0"), ( + "datasets v1.11.0 or later required for SQuAD" + ) def has_training_docs(self): return True diff --git a/lm_eval/tasks/tmlu/default/_generate_configs.py b/lm_eval/tasks/tmlu/default/_generate_configs.py index 86b176085dc76366db3f6745d21e99a3a40b1b0c..79e2175d6be10adb4b718868fdf13d74c31c5d48 100644 --- a/lm_eval/tasks/tmlu/default/_generate_configs.py +++ b/lm_eval/tasks/tmlu/default/_generate_configs.py @@ -14,7 +14,8 @@ categories = { "STEM": [ "biology", "chemistry", - "mathematics" "physics", + "mathematics", + "physics", "earth science", ], "humanities": ["Chinese", "history", "Tour", "law"], diff --git a/lm_eval/tasks/tmlu/default/tmlu_driving_rule.yaml b/lm_eval/tasks/tmlu/default/tmlu_driving_rule.yaml index 965084c8d5d3b1904b724d80665c1f19084c73c2..a810322e203d685d74dbd697942f9d8f3b95fa75 100644 --- a/lm_eval/tasks/tmlu/default/tmlu_driving_rule.yaml +++ b/lm_eval/tasks/tmlu/default/tmlu_driving_rule.yaml @@ -9,7 +9,7 @@ D. {{choices[3]}}{% if choices is defined and choices|length > 4 %}\nE. {{choices[4]}}{%\ \ endif %}{% if choices is defined and choices|length > 5 %}\nF. {{choices[5]}}{%\ \ endif %}\nAnswer:" -"tag": "tmlu_taiwan_specific" +"tag": "tmlu_taiwan_specific_tasks" "include": "_default_template_yaml" "task": "tmlu_driving_rule" "task_alias": "driving rule" diff --git a/lm_eval/tasks/tmlu/default/tmlu_taiwan_tourist_resources.yaml b/lm_eval/tasks/tmlu/default/tmlu_taiwan_tourist_resources.yaml index 6a1fc7b26ace1e95c2e1df92c26cc41c12d4632e..3fa66f65217c4e18e10046f486fff5ff1aff4d86 100644 --- a/lm_eval/tasks/tmlu/default/tmlu_taiwan_tourist_resources.yaml +++ b/lm_eval/tasks/tmlu/default/tmlu_taiwan_tourist_resources.yaml @@ -9,7 +9,7 @@ D. {{choices[3]}}{% if choices is defined and choices|length > 4 %}\nE. {{choices[4]}}{%\ \ endif %}{% if choices is defined and choices|length > 5 %}\nF. {{choices[5]}}{%\ \ endif %}\nAnswer:" -"tag": "tmlu_taiwan_specific" +"tag": "tmlu_taiwan_specific_tasks" "include": "_default_template_yaml" "task": "tmlu_taiwan_tourist_resources" "task_alias": "taiwan tourist resources" diff --git a/lm_eval/tasks/tmlu/default/tmlu_teacher_qualification.yaml b/lm_eval/tasks/tmlu/default/tmlu_teacher_qualification.yaml index 987c2d7d92199355c1158111391a24d983353881..55e65c87aeb95e9ad771663c85ffe05208e2efd3 100644 --- a/lm_eval/tasks/tmlu/default/tmlu_teacher_qualification.yaml +++ b/lm_eval/tasks/tmlu/default/tmlu_teacher_qualification.yaml @@ -9,7 +9,7 @@ D. {{choices[3]}}{% if choices is defined and choices|length > 4 %}\nE. {{choices[4]}}{%\ \ endif %}{% if choices is defined and choices|length > 5 %}\nF. {{choices[5]}}{%\ \ endif %}\nAnswer:" -"tag": "tmlu_taiwan_specific" +"tag": "tmlu_taiwan_specific_tasks" "include": "_default_template_yaml" "task": "tmlu_teacher_qualification" "task_alias": "teacher qualification" diff --git a/lm_eval/utils.py b/lm_eval/utils.py index 7166e24d0723e397f00347d6f14eb14e5902a452..18c7057fc4ea50b530d669b4dc5c1d5e10dc7e2a 100644 --- a/lm_eval/utils.py +++ b/lm_eval/utils.py @@ -10,7 +10,7 @@ import os import re from dataclasses import asdict, is_dataclass from itertools import islice -from typing import Any, Callable, List +from typing import Any, Callable, Generator, List, Tuple import numpy as np import yaml @@ -48,9 +48,9 @@ def escaped_split(text, sep_char, maxsplit=-1): is not specified or less than 0, then there is no limit on the number of splits (all possible splits are made). """ - assert ( - len(sep_char) == 1 - ), "separation string must be a single character for escaped splitting" + assert len(sep_char) == 1, ( + "separation string must be a single character for escaped splitting" + ) if maxsplit == 0: return text @@ -104,7 +104,8 @@ def simple_parse_args_string(args_string): return {} arg_list = [arg for arg in args_string.split(",") if arg] args_dict = { - k: handle_arg_string(v) for k, v in [arg.split("=") for arg in arg_list] + kv[0]: handle_arg_string("=".join(kv[1:])) + for kv in [arg.split("=") for arg in arg_list] } return args_dict @@ -201,7 +202,9 @@ def get_sample_results_filenames(filenames: List[str]) -> List[str]: return [f for f in filenames if "/samples_" in f and ".json" in f] -def get_rolling_token_windows(token_list, prefix_token, max_seq_len, context_len): +def get_rolling_token_windows( + token_list: List[int], prefix_token: int, max_seq_len: int, context_len: int +) -> Generator[Tuple[List[int], List[int]], None, None]: """ - context_len allows for a rolling window context, allowing each prediction window to potentially condition on some context @@ -228,7 +231,7 @@ def get_rolling_token_windows(token_list, prefix_token, max_seq_len, context_len # Special handling for first window: predict all tokens first_seq_len = min(max_seq_len, len(token_list)) - yield ([prefix_token] + token_list[: first_seq_len - 1], token_list[:first_seq_len]) + yield [prefix_token] + token_list[: first_seq_len - 1], token_list[:first_seq_len] predicted += first_seq_len while predicted < len(token_list): @@ -242,7 +245,9 @@ def get_rolling_token_windows(token_list, prefix_token, max_seq_len, context_len predicted += window_pred_len -def make_disjoint_window(pair): +def make_disjoint_window( + pair: Tuple[List[int], List[int]], +) -> Tuple[List[int], List[int]]: """Takes output from get_rolling_token_windows and makes the context not overlap with the continuation""" a, b = pair return a[: len(a) - (len(b) - 1)], b diff --git a/pyproject.toml b/pyproject.toml index 19a9ca78a7cd1018d8de772a05d1de9ec596bc22..f2aac3554e01d8e9f53ba0480203de665ebd2ab8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "lm_eval" -version = "0.4.5" +version = "0.4.7" authors = [ {name="EleutherAI", email="contact@eleuther.ai"} ] @@ -16,7 +16,7 @@ classifiers = [ "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", ] -requires-python = ">=3.8" +requires-python = ">=3.9" license = { "text" = "MIT" } dependencies = [ "accelerate>=0.26.0", diff --git a/scripts/model_comparator.py b/scripts/model_comparator.py index 55f4f3b15468b2f46e590cbfd82d7902f1d9a16f..ae211824dfa7c35b6c3849ebb8dae05088a2104a 100644 --- a/scripts/model_comparator.py +++ b/scripts/model_comparator.py @@ -17,7 +17,7 @@ eval_logger = utils.eval_logger def memory_stats(): eval_logger.info( - f"Memory allocated: {torch.cuda.memory_allocated() / 1024 ** 2}, reserved: {torch.cuda.memory_reserved() // 1024 ** 2}" + f"Memory allocated: {torch.cuda.memory_allocated() / 1024**2}, reserved: {torch.cuda.memory_reserved() // 1024**2}" ) diff --git a/scripts/zeno_visualize.py b/scripts/zeno_visualize.py index 4bc7e03bf8417646ee5ffad9bd7f25e332dac597..1668471c611ab5b9fb04148c9071549776a0eb8a 100644 --- a/scripts/zeno_visualize.py +++ b/scripts/zeno_visualize.py @@ -66,9 +66,9 @@ def main(): f"All models must have the same tasks. {model} has tasks: {model_tasks} but have already recorded tasks: {old_tasks}. Taking intersection {tasks}" ) - assert ( - len(tasks) > 0 - ), "Must provide at least one task in common amongst models to compare." + assert len(tasks) > 0, ( + "Must provide at least one task in common amongst models to compare." + ) for task in tasks: # Upload data for all models @@ -109,13 +109,14 @@ def main(): if model_index == 0: # Only need to assemble data for the first model metrics = [] for metric in config["metric_list"]: - metrics.append( - ZenoMetric( - name=metric["metric"], - type="mean", - columns=[metric["metric"]], + if metric.get("aggregation") == "mean": + metrics.append( + ZenoMetric( + name=metric["metric"], + type="mean", + columns=[metric["metric"]], + ) ) - ) project = client.create_project( name=args.project_name + (f"_{task}" if len(tasks) > 1 else ""), view="text-classification", @@ -168,7 +169,11 @@ def generate_dataset( Returns: pd.Dataframe: A dataframe that is ready to be uploaded to Zeno. """ - ids = [x["doc_id"] for x in data] + ids = ( + [x["doc_id"] for x in data] + if not config.get("filter_list") + else [f"{x['doc_id']}.{x['filter']}" for x in data] + ) labels = [x["target"] for x in data] instance = [""] * len(ids) @@ -190,6 +195,7 @@ def generate_dataset( return pd.DataFrame( { "id": ids, + "doc_id": [x["doc_id"] for x in data], "data": instance, "input_len": [len(x) for x in instance], "labels": labels, @@ -208,8 +214,15 @@ def generate_system_df(data, config): Returns: pd.Dataframe: A dataframe that is ready to be uploaded to Zeno as a system. """ - ids = [x["doc_id"] for x in data] + ids = ( + [x["doc_id"] for x in data] + if not config.get("filter_list") + else [f"{x['doc_id']}.{x['filter']}" for x in data] + ) system_dict = {"id": ids} + system_dict["doc_id"] = [x["doc_id"] for x in data] + if config.get("filter_list"): + system_dict["filter"] = [x["filter"] for x in data] system_dict["output"] = [""] * len(ids) if config["output_type"] == "loglikelihood": @@ -228,11 +241,10 @@ def generate_system_df(data, config): system_dict["output"] = [str(x["filtered_resps"][0]) for x in data] system_dict["output_length"] = [len(str(x["filtered_resps"][0])) for x in data] - metrics = {} - for metric in config["metric_list"]: - if "aggregation" in metric and metric["aggregation"] == "mean": - metrics[metric["metric"]] = [x[metric["metric"]] for x in data] - + metrics = { + metric["metric"]: [x[metric["metric"]] for x in data] + for metric in config["metric_list"] + } system_dict.update(metrics) system_df = pd.DataFrame(system_dict) return system_df diff --git a/tests/test_tasks.py b/tests/test_tasks.py index fc9bb59df47fe37682b8a1c938d2056efc544390..b70bb81fa3c5ecd0dc6529704487a20e396a9fe6 100644 --- a/tests/test_tasks.py +++ b/tests/test_tasks.py @@ -87,7 +87,9 @@ class TestNewTasks: (x[-1].isspace() is False if len(x) > 0 else True) if target_delimiter.isspace() else True - ), "doc_to_text ends in a whitespace and target delimiter also a whitespace" + ), ( + "doc_to_text ends in a whitespace and target delimiter also a whitespace" + ) else: pass