diff --git a/README.md b/README.md index e042ad868d7833136838990dd11edc3c113982c3..91f07b6542a48f3898e5026aa2e2cdac98d39ed7 100644 --- a/README.md +++ b/README.md @@ -197,6 +197,8 @@ It is on our roadmap to create task variants designed to enable models which do A number of other libraries contain scripts for calling the eval harness through their library. These include [GPT-NeoX](https://github.com/EleutherAI/gpt-neox/blob/main/eval_tasks/eval_adapter.py), [Megatron-DeepSpeed](https://github.com/microsoft/Megatron-DeepSpeed/blob/main/examples/MoE/readme_evalharness.md), and [mesh-transformer-jax](https://github.com/kingoflolz/mesh-transformer-jax/blob/master/eval_harness.py). +To create your own custom integration you can follow instructions from [this tutorial](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/interface.md#external-library-usage). + ### Additional Features If you have a Metal compatible Mac, you can run the eval harness using the MPS back-end by replacing `--device cuda:0` with `--device mps` (requires PyTorch version 2.1 or higher). diff --git a/lm_eval/api/metrics.py b/lm_eval/api/metrics.py index 85a944c888dca54d890cf91ad71cc15fe42f70b6..5f1e77a111660812e91f5e67dd7c9193238a38ae 100644 --- a/lm_eval/api/metrics.py +++ b/lm_eval/api/metrics.py @@ -2,6 +2,7 @@ import logging import math import random from collections.abc import Iterable +from collections import defaultdict import evaluate import numpy as np @@ -110,6 +111,39 @@ def ter(items): return sacrebleu.corpus_ter(preds, refs).score +@register_aggregation("brier_score") +def brier_score(items): # This is a passthrough function + + # Certain datasets like arc_easy can have a different number of choices. + golds, predictions = list(zip(*items)) + + pred_group = defaultdict(list) + gold_group = defaultdict(list) + for gold, pred in zip(golds, predictions): + pred_group[len(pred)].append(pred) + gold_group[len(pred)].append(gold) + + total_size = 0 + average = 0 + for g, p in zip(gold_group.values(), pred_group.values()): + _p = np.array(p) + _g = np.array(g) + average += np.mean(np.sum((_p - _g) ** 2, axis=1)) * len(_g) + total_size += len(_g) + + return average / total_size + + +@register_metric( + metric="brier_score", + higher_is_better=False, + output_type=["multiple_choice"], + aggregation="brier_score", +) +def brier_score_fn(items): # This is a passthrough function + return items + + @register_metric( metric="acc", higher_is_better=True, diff --git a/lm_eval/api/task.py b/lm_eval/api/task.py index 6fe2c69800a62b8ececcfd780b601a99960857e5..f2cd022452bd6459d342a4bfeb662308f47500e4 100644 --- a/lm_eval/api/task.py +++ b/lm_eval/api/task.py @@ -1016,9 +1016,16 @@ class ConfigurableTask(Task): if self.OUTPUT_TYPE == "loglikelihood": results = results[0] ll, is_greedy = results + prob_norm = np.exp(ll) + return { **({"perplexity": ll} if "perplexity" in use_metric else {}), **({"acc": int(is_greedy)} if "acc" in use_metric else {}), + **( + {"brier_score": (0, [prob_norm])} # Gold is Index 0 + if "brier_score" in use_metric + else {} + ), } elif self.OUTPUT_TYPE == "loglikelihood_rolling": (loglikelihood,) = results @@ -1097,12 +1104,22 @@ class ConfigurableTask(Task): # TODO: this gets score of 0 on arc_challenge for pythia-70m. need to test that this works properly exact_match = int(is_greedy[gold]) if gold != -100 else 0 + prob_norm = utils.softmax(lls) + + # TODO use keyword arguments to the metric? + # gold, pred, norm stuff, the original lls, result_dict = { **({"acc": acc} if "acc" in use_metric else {}), **({"f1": (gold, pred)} if "f1" in use_metric else {}), **({"mcc": (gold, pred)} if "mcc" in use_metric else {}), **({"acc_norm": acc_norm} if "acc_norm" in use_metric else {}), **({"exact_match": exact_match} if "exact_match" in use_metric else {}), + **( + # {"brier_score": (gold, prob_norm)} + {"brier_score": [np.eye(len(prob_norm))[gold], prob_norm]} + if "brier_score" in use_metric + else {} + ), } if "acc_mutual_info" in use_metric: diff --git a/lm_eval/evaluator.py b/lm_eval/evaluator.py index 1eb1c618a61d0061d0606794ce36c645ca68ffd2..e6c56aab5f19a83e9c2d47dbdceb7ca192575f6b 100644 --- a/lm_eval/evaluator.py +++ b/lm_eval/evaluator.py @@ -486,18 +486,24 @@ def evaluate( # For unweighted averaging, use: # current_size = 1 + # TODO: Tasks like brier score for individual + # tasks have no stderr since the score is + # itself an aggregation. But it's possible to + # calculate the stderr over groups + all_stderr = [] for metric in [ key for key in metrics.keys() if "_stderr" not in key ]: stderr = "_stderr,".join(metric.split(",")) stderr_score = results[task][stderr] - if isinstance(stderr_score, str): - stderr_score = 0 - var_score = stderr_score**2 - metric_score = results[task][metric] + if stderr_score == "N/A": + var_score = "N/A" + else: + var_score = stderr_score**2 + all_stderr.append(stderr) - all_stderr.append(stderr) + metric_score = results[task][metric] if metric in results[group]: results[group][metric] = ( @@ -505,15 +511,20 @@ def evaluate( + metric_score * current_size ) / (total_size + current_size) # $$s_z^2 = \frac{(n-1) s_x^2 + (m-1) s_y^2}{n+m-1} + \frac{nm(\bar x - \bar y)^2}{(n+m)(n+m-1)}.$$ - results[group][stderr] = ( - (total_size - 1) * results[group][stderr] - + (current_size - 1) * var_score - ) / ( - total_size + current_size - 1 - ) + total_size * current_size / ( - (total_size + current_size) - * (total_size + current_size - 1) - ) * (results[group][metric] - metric_score) ** 2 + if var_score == "N/A": + results[group][stderr] = "N/A" + else: + results[group][stderr] = ( + (total_size - 1) * results[group][stderr] + + (current_size - 1) * var_score + ) / ( + total_size + current_size - 1 + ) + total_size * current_size / ( + (total_size + current_size) + * (total_size + current_size - 1) + ) * ( + results[group][metric] - metric_score + ) ** 2 else: results[group][metric] = metric_score results[group][stderr] = var_score diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/README.md b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/README.md new file mode 100644 index 0000000000000000000000000000000000000000..93600ae181761370baf6c9fdce7860af5737a978 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/README.md @@ -0,0 +1,20 @@ + + +Investigate affect of letter options +- (A) +- A) +- A. +- A\t +- (a) +- a) +- a. +- a\t + +Answer types: +- letters only + - original option + - just letter +- letters + continuation + - original option + - just letter +- continuation diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/_arc_challenge_alt_yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/_arc_challenge_alt_yaml new file mode 100644 index 0000000000000000000000000000000000000000..6266a6fa6093283822cc78a51786b701fd3ec3ba --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/_arc_challenge_alt_yaml @@ -0,0 +1,23 @@ +group: + - ai2_arc +dataset_path: ai2_arc +dataset_name: ARC-Challenge +output_type: multiple_choice +training_split: train +validation_split: validation +test_split: test +doc_to_text: "Question: {{question}}\nAnswer:" +doc_to_target: "{{choices.label.index(answerKey)}}" +doc_to_choice: "{{choices.text}}" +should_decontaminate: true +doc_to_decontamination_query: "Question: {{question}}\nAnswer:" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: brier_score + aggregation: brier_score + higher_is_better: false diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/arc_challenge_alt.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/arc_challenge_alt.yaml new file mode 100644 index 0000000000000000000000000000000000000000..128fec5fddca74fb775d271386bd84f1d6729741 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/arc_challenge_alt.yaml @@ -0,0 +1,10 @@ +group: arc_challenge_alt_ov +task: + - arc_challenge_alt_ov_01 + - arc_challenge_alt_ov_02 + - arc_challenge_alt_ov_03 + - arc_challenge_alt_ov_04 + - arc_challenge_alt_ov_05 + - arc_challenge_alt_ov_06 + - arc_challenge_alt_ov_07 + - arc_challenge_alt_ov_08 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_01/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_01/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e2fe0438b09abcf22fbdc0a9c49224678b6c0f78 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_01/a.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_01 +task: arc_challenge_alt_ov_01a +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01a +doc_to_decontamination_query: !function ../styles.template_01 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_01/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_01/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f99170f64a33d11bb3d69394ba4d34696ca9260c --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_01/b.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_01 +task: arc_challenge_alt_ov_01b +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01b +doc_to_decontamination_query: !function ../styles.template_01 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_01/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_01/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f985df9ec1481335d84975ee8d6fb626f8175a55 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_01/c.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_01 +task: arc_challenge_alt_ov_01c +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01c +doc_to_decontamination_query: !function ../styles.template_01 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_02/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_02/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b4ef10d2c57f874b68b5100f9feaf2011ae36695 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_02/a.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_02 +task: arc_challenge_alt_ov_02a +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02a +doc_to_decontamination_query: !function ../styles.template_02 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_02/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_02/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..05066f224aa22ad20baabbd6733a9e9d34e4c368 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_02/b.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_02 +task: arc_challenge_alt_ov_02b +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02b +doc_to_decontamination_query: !function ../styles.template_02 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_02/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_02/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ce04fda3e53ae0bcacd6768c70b42b06e36af0ff --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_02/c.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_02 +task: arc_challenge_alt_ov_02c +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02c +doc_to_decontamination_query: !function ../styles.template_02 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_03/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_03/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..864a7912bcd4380e3a50526777a890aaf4312d8a --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_03/a.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_03 +task: arc_challenge_alt_ov_03a +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03a +doc_to_decontamination_query: !function ../styles.template_03 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_03/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_03/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c9c2c9d9972bbf935bc6c5627ae870ab6b62f081 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_03/b.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_03 +task: arc_challenge_alt_ov_03b +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03b +doc_to_decontamination_query: !function ../styles.template_03 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_03/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_03/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4e9d03971aac2ca266bcc57f9c623ec0c317ae1a --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_03/c.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_03 +task: arc_challenge_alt_ov_03c +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03c +doc_to_decontamination_query: !function ../styles.template_03 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_04/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_04/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8061081d5ac1844a197813dab3248f15b5a16b23 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_04/a.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_04 +task: arc_challenge_alt_ov_04a +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04a +doc_to_decontamination_query: !function ../styles.template_04 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_04/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_04/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..91c868a820ded2562a71b434519f453512b419af --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_04/b.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_04 +task: arc_challenge_alt_ov_04b +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04b +doc_to_decontamination_query: !function ../styles.template_04 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_04/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_04/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..985815839e13b103d0fe4c4bd4e15277da94f504 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_04/c.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_04 +task: arc_challenge_alt_ov_04c +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04c +doc_to_decontamination_query: !function ../styles.template_04 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_05/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_05/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..04fccae4d465dce2ebf75c26afa0b310a100e7e8 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_05/a.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_05 +task: arc_challenge_alt_ov_05a +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05a +doc_to_decontamination_query: !function ../styles.template_05 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_05/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_05/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eb2a641933d52af947b8decc3bb49e5c3d46e47e --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_05/b.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_05 +task: arc_challenge_alt_ov_05b +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05b +doc_to_decontamination_query: !function ../styles.template_05 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_05/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_05/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..757f1c25a6e3d2d9cfd3680f18a611a072ee3fca --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_05/c.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_05 +task: arc_challenge_alt_ov_05c +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05c +doc_to_decontamination_query: !function ../styles.template_05 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_06/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_06/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c3f2535e7ad536f9ee544ad36ecf159f25e511b5 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_06/a.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_06 +task: arc_challenge_alt_ov_06a +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06a +doc_to_decontamination_query: !function ../styles.template_06 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_06/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_06/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d36f1f1a7698b4ce879d0d3c7b63a877689a698d --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_06/b.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_06 +task: arc_challenge_alt_ov_06b +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06b +doc_to_decontamination_query: !function ../styles.template_06 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_06/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_06/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fcbfd04c9b39d183eccdcbfd84a055d02dc4c14c --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_06/c.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_06 +task: arc_challenge_alt_ov_06c +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06c +doc_to_decontamination_query: !function ../styles.template_06 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_07/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_07/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..04f6e74215ce53ae3f02c6f103b6230fe01bb089 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_07/a.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_07 +task: arc_challenge_alt_ov_07a +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07a +doc_to_decontamination_query: !function ../styles.template_07 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_07/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_07/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1817632a212d777b4759ae91a407388454bc9ae6 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_07/b.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_07 +task: arc_challenge_alt_ov_07b +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07b +doc_to_decontamination_query: !function ../styles.template_07 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_07/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_07/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..66c376a5903e699940f826e97530a1439a0543db --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_07/c.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_07 +task: arc_challenge_alt_ov_07c +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07c +doc_to_decontamination_query: !function ../styles.template_07 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_08/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_08/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..da9fe33dc7124256246e0d2d627d9e966a90d897 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_08/a.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_08 +task: arc_challenge_alt_ov_08a +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08a +doc_to_decontamination_query: !function ../styles.template_08 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_08/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_08/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8fc7c7e8208015270d2e850812a2109d5822a9dc --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_08/b.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_08 +task: arc_challenge_alt_ov_08b +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08b +doc_to_decontamination_query: !function ../styles.template_08 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_08/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_08/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9a12a0b237cfedba3a2665975a75c98ef075c173 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_08/c.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_08 +task: arc_challenge_alt_ov_08c +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08c +doc_to_decontamination_query: !function ../styles.template_08 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/styles.py b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/styles.py new file mode 100644 index 0000000000000000000000000000000000000000..a4e6134801404f946ee09394659e4439037898f1 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/styles.py @@ -0,0 +1,90 @@ +import string +from functools import partial + + +def doc_to_text_base(alphabet, style, doc): + + choices = doc["choices"]["text"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + + if "\t" in style: + choice_string = "{}{}" + else: + choice_string = "{} {}" + + doc_to_text = "\n".join( + [ + "Question: " + doc["question"], + ] + + [choice_string.format(i, j) for i, j in zip(letter_list, choices)] + + ["Answer:"] + ) + + return doc_to_text + + +# Full continuation +def choice_A(doc): + return doc["choices"]["text"] + + +# Letters only +def choice_B(alphabet, style, doc): + + choices = doc["choices"]["text"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" in style: + letter_list = [letter.replace("\t", "") for letter in letter_list] + + return letter_list + + +# Letters + Full continuation +def choice_C(alphabet, style, doc): + + choices = doc["choices"]["text"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" not in style: + letter_list = [letter + " " for letter in letter_list] + + return [letter + choice for letter, choice in zip(letter_list, choices)] + + +template_01 = partial(doc_to_text_base, string.ascii_lowercase, "({})") +choice_01a = choice_A +choice_01b = partial(choice_B, string.ascii_lowercase, "({})") +choice_01c = partial(choice_C, string.ascii_lowercase, "({})") +template_02 = partial(doc_to_text_base, string.ascii_lowercase, "{})") +choice_02a = choice_A +choice_02b = partial(choice_B, string.ascii_lowercase, "{})") +choice_02c = partial(choice_C, string.ascii_lowercase, "{})") +template_03 = partial(doc_to_text_base, string.ascii_lowercase, "{}.") +choice_03a = choice_A +choice_03b = partial(choice_B, string.ascii_lowercase, "{}.") +choice_03c = partial(choice_C, string.ascii_lowercase, "{}.") +template_04 = partial(doc_to_text_base, string.ascii_lowercase, "{}\t") +choice_04a = choice_A +choice_04b = partial(choice_B, string.ascii_lowercase, "{}\t") +choice_04c = partial(choice_C, string.ascii_lowercase, "{}\t") +template_05 = partial(doc_to_text_base, string.ascii_uppercase, "({})") +choice_05a = choice_A +choice_05b = partial(choice_B, string.ascii_uppercase, "({})") +choice_05c = partial(choice_C, string.ascii_uppercase, "({})") +template_06 = partial(doc_to_text_base, string.ascii_uppercase, "{})") +choice_06a = choice_A +choice_06b = partial(choice_B, string.ascii_uppercase, "{})") +choice_06c = partial(choice_C, string.ascii_uppercase, "{})") +template_07 = partial(doc_to_text_base, string.ascii_uppercase, "{}.") +choice_07a = choice_A +choice_07b = partial(choice_B, string.ascii_uppercase, "{}.") +choice_07c = partial(choice_C, string.ascii_uppercase, "{}.") +template_08 = partial(doc_to_text_base, string.ascii_uppercase, "{}\t") +choice_08a = choice_A +choice_08b = partial(choice_B, string.ascii_uppercase, "{}\t") +choice_08c = partial(choice_C, string.ascii_uppercase, "{}\t") diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/prompt_variation/_arc_challenge_alt_yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/prompt_variation/_arc_challenge_alt_yaml new file mode 100644 index 0000000000000000000000000000000000000000..e8ab9268f9fa1238b1f1a3b2655dee690e375010 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/prompt_variation/_arc_challenge_alt_yaml @@ -0,0 +1,21 @@ +dataset_path: ai2_arc +dataset_name: ARC-Challenge +output_type: multiple_choice +training_split: train +validation_split: validation +test_split: test +doc_to_text: "Question: {{question}}\nAnswer:" +doc_to_target: "{{choices.label.index(answerKey)}}" +doc_to_choice: "{{choices.text}}" +should_decontaminate: true +doc_to_decontamination_query: "Question: {{question}}\nAnswer:" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: brier_score + aggregation: brier_score + higher_is_better: false diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/prompt_variation/style_01.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/prompt_variation/style_01.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e113e7c803569fa2f4424e9e936382b0377e3da0 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/prompt_variation/style_01.yaml @@ -0,0 +1,5 @@ +include: _arc_challenge_alt_yaml +group: arc_challenge_alt_pv +task: arc_challenge_alt_pv_01 +doc_to_text: "{{question}}" +doc_to_decontamination_query: "{{question}}" diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/prompt_variation/style_02.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/prompt_variation/style_02.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c808c137c75cdffdd94d02dbe3ff603873936955 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/prompt_variation/style_02.yaml @@ -0,0 +1,5 @@ +include: _arc_challenge_alt_yaml +group: arc_challenge_alt_pv +task: arc_challenge_alt_pv_02 +doc_to_text: "Q: {{question}}\nA:" +doc_to_decontamination_query: "Q: {{question}}\nA:" diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/prompt_variation/style_03.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/prompt_variation/style_03.yaml new file mode 100644 index 0000000000000000000000000000000000000000..657ed98b9598fac2ea03c14c173a94a06006aed1 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/prompt_variation/style_03.yaml @@ -0,0 +1,5 @@ +include: _arc_challenge_alt_yaml +group: arc_challenge_alt_pv +task: arc_challenge_alt_pv_03 +doc_to_text: "Question: {{question}}\nAnswer:" +doc_to_decontamination_query: "Question: {{question}}\nAnswer:" diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/README.md b/lm_eval/tasks/arc/alternative_worlds/arc_easy/README.md new file mode 100644 index 0000000000000000000000000000000000000000..93600ae181761370baf6c9fdce7860af5737a978 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/README.md @@ -0,0 +1,20 @@ + + +Investigate affect of letter options +- (A) +- A) +- A. +- A\t +- (a) +- a) +- a. +- a\t + +Answer types: +- letters only + - original option + - just letter +- letters + continuation + - original option + - just letter +- continuation diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/_arc_easy_alt_yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/_arc_easy_alt_yaml new file mode 100644 index 0000000000000000000000000000000000000000..633826a20488b4afafa2845b68f7f7efee6a37d0 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/_arc_easy_alt_yaml @@ -0,0 +1,24 @@ +group: + - ai2_arc +task: arc_easy +dataset_path: ai2_arc +dataset_name: ARC-Easy +output_type: multiple_choice +training_split: train +validation_split: validation +test_split: test +doc_to_text: "Question: {{question}}\nAnswer:" +doc_to_target: "{{choices.label.index(answerKey)}}" +doc_to_choice: "{{choices.text}}" +should_decontaminate: true +doc_to_decontamination_query: "Question: {{question}}\nAnswer:" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: brier_score + aggregation: brier_score + higher_is_better: false diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/arc_easy_alt.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/arc_easy_alt.yaml new file mode 100644 index 0000000000000000000000000000000000000000..af4dcf97e69dcb617d256002d9fc34a3f12150b9 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/arc_easy_alt.yaml @@ -0,0 +1,10 @@ +group: arc_easy_alt_ov +task: + - arc_easy_alt_ov_01 + - arc_easy_alt_ov_02 + - arc_easy_alt_ov_03 + - arc_easy_alt_ov_04 + - arc_easy_alt_ov_05 + - arc_easy_alt_ov_06 + - arc_easy_alt_ov_07 + - arc_easy_alt_ov_08 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_01/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_01/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..784262d720f3a8b8b4c0701af42bcac7e8638407 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_01/a.yaml @@ -0,0 +1,6 @@ +include: ../_arc_easy_alt_yaml +group: arc_easy_alt_ov_01 +task: arc_easy_alt_ov_01a +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01a +doc_to_decontamination_query: !function ../styles.template_01 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_01/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_01/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..29958b6c7a6ff092fd131af4ba32c6869f36d126 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_01/b.yaml @@ -0,0 +1,6 @@ +include: ../_arc_easy_alt_yaml +group: arc_easy_alt_ov_01 +task: arc_easy_alt_ov_01b +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01b +doc_to_decontamination_query: !function ../styles.template_01 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_01/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_01/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a2bcae1af9db78d27034ded6cab422206e963513 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_01/c.yaml @@ -0,0 +1,6 @@ +include: ../_arc_easy_alt_yaml +group: arc_easy_alt_ov_01 +task: arc_easy_alt_ov_01c +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01c +doc_to_decontamination_query: !function ../styles.template_01 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_02/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_02/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6cb6d7014dbef00d59ae171f907ca20547a9f8a5 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_02/a.yaml @@ -0,0 +1,6 @@ +include: ../_arc_easy_alt_yaml +group: arc_easy_alt_ov_02 +task: arc_easy_alt_ov_02a +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02a +doc_to_decontamination_query: !function ../styles.template_02 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_02/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_02/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b8afda44992b0acdc0086b15be4098a65bc353ab --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_02/b.yaml @@ -0,0 +1,6 @@ +include: ../_arc_easy_alt_yaml +group: arc_easy_alt_ov_02 +task: arc_easy_alt_ov_02b +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02b +doc_to_decontamination_query: !function ../styles.template_02 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_02/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_02/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a9a8b71a444b20dcf9f580860490f535292778d8 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_02/c.yaml @@ -0,0 +1,6 @@ +include: ../_arc_easy_alt_yaml +group: arc_easy_alt_ov_02 +task: arc_easy_alt_ov_02c +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02c +doc_to_decontamination_query: !function ../styles.template_02 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_03/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_03/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a553e3137f654a4ae842c76a7534697dee3bac64 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_03/a.yaml @@ -0,0 +1,6 @@ +include: ../_arc_easy_alt_yaml +group: arc_easy_alt_ov_03 +task: arc_easy_alt_ov_03a +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03a +doc_to_decontamination_query: !function ../styles.template_03 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_03/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_03/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fc7e433974d816792757b6ef80f0eb508399d0af --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_03/b.yaml @@ -0,0 +1,6 @@ +include: ../_arc_easy_alt_yaml +group: arc_easy_alt_ov_03 +task: arc_easy_alt_ov_03b +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03b +doc_to_decontamination_query: !function ../styles.template_03 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_03/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_03/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bd80d2db8e0f2e140a77be9c2ea165883b412fe8 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_03/c.yaml @@ -0,0 +1,6 @@ +include: ../_arc_easy_alt_yaml +group: arc_easy_alt_ov_03 +task: arc_easy_alt_ov_03c +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03c +doc_to_decontamination_query: !function ../styles.template_03 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_04/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_04/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..307809e87e28e9964bb697e867f54e23c8be0e12 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_04/a.yaml @@ -0,0 +1,6 @@ +include: ../_arc_easy_alt_yaml +group: arc_easy_alt_ov_04 +task: arc_easy_alt_ov_04a +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04a +doc_to_decontamination_query: !function ../styles.template_04 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_04/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_04/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..81c88c4513f48ab496fe69172343a889ba8dbfd6 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_04/b.yaml @@ -0,0 +1,6 @@ +include: ../_arc_easy_alt_yaml +group: arc_easy_alt_ov_04 +task: arc_easy_alt_ov_04b +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04b +doc_to_decontamination_query: !function ../styles.template_04 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_04/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_04/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c18665d9b2cce38b86b78333b2ba30244438e83 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_04/c.yaml @@ -0,0 +1,6 @@ +include: ../_arc_easy_alt_yaml +group: arc_easy_alt_ov_04 +task: arc_easy_alt_ov_04c +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04c +doc_to_decontamination_query: !function ../styles.template_04 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_05/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_05/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d4393910848eaea2f676c23e5bfbb3c81108c011 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_05/a.yaml @@ -0,0 +1,6 @@ +include: ../_arc_easy_alt_yaml +group: arc_easy_alt_ov_05 +task: arc_easy_alt_ov_05a +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05a +doc_to_decontamination_query: !function ../styles.template_05 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_05/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_05/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9fa728a1a466e3b6a8a48a0a9426848fb8966e3d --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_05/b.yaml @@ -0,0 +1,6 @@ +include: ../_arc_easy_alt_yaml +group: arc_easy_alt_ov_05 +task: arc_easy_alt_ov_05b +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05b +doc_to_decontamination_query: !function ../styles.template_05 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_05/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_05/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eb8c3f71bb5d5a8ace69c8ba36d4d377657fbe20 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_05/c.yaml @@ -0,0 +1,6 @@ +include: ../_arc_easy_alt_yaml +group: arc_easy_alt_ov_05 +task: arc_easy_alt_ov_05c +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05c +doc_to_decontamination_query: !function ../styles.template_05 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_06/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_06/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..47141d575cd6c9159f7a0be22981851907d7b98c --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_06/a.yaml @@ -0,0 +1,6 @@ +include: ../_arc_easy_alt_yaml +group: arc_easy_alt_ov_06 +task: arc_easy_alt_ov_06a +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06a +doc_to_decontamination_query: !function ../styles.template_06 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_06/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_06/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2ba20727d682ff47ccf088fd82638f832e031370 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_06/b.yaml @@ -0,0 +1,6 @@ +include: ../_arc_easy_alt_yaml +group: arc_easy_alt_ov_06 +task: arc_easy_alt_ov_06b +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06b +doc_to_decontamination_query: !function ../styles.template_06 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_06/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_06/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..04c7fc4dd868e556565280e193d42ac7e4ad8116 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_06/c.yaml @@ -0,0 +1,6 @@ +include: ../_arc_easy_alt_yaml +group: arc_easy_alt_ov_06 +task: arc_easy_alt_ov_06c +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06c +doc_to_decontamination_query: !function ../styles.template_06 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_07/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_07/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8bd4168cbe3fcc6788a7e0b1131e0340c8f5a605 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_07/a.yaml @@ -0,0 +1,6 @@ +include: ../_arc_easy_alt_yaml +group: arc_easy_alt_ov_07 +task: arc_easy_alt_ov_07a +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07a +doc_to_decontamination_query: !function ../styles.template_07 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_07/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_07/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6e797f23afa6805215e69e830c2825edc9d48937 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_07/b.yaml @@ -0,0 +1,6 @@ +include: ../_arc_easy_alt_yaml +group: arc_easy_alt_ov_07 +task: arc_easy_alt_ov_07b +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07b +doc_to_decontamination_query: !function ../styles.template_07 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_07/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_07/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..145098cd2912ca3f7e1503c2ee0a8987e2a6477c --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_07/c.yaml @@ -0,0 +1,6 @@ +include: ../_arc_easy_alt_yaml +group: arc_easy_alt_ov_07 +task: arc_easy_alt_ov_07c +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07c +doc_to_decontamination_query: !function ../styles.template_07 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_08/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_08/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..84f41d12e14d22df8034c9b5328b812d6fdf4242 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_08/a.yaml @@ -0,0 +1,6 @@ +include: ../_arc_easy_alt_yaml +group: arc_easy_alt_ov_08 +task: arc_easy_alt_ov_08a +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08a +doc_to_decontamination_query: !function ../styles.template_08 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_08/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_08/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e30c8acec6af39ea149dfa8f5b99962957d9207 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_08/b.yaml @@ -0,0 +1,6 @@ +include: ../_arc_easy_alt_yaml +group: arc_easy_alt_ov_08 +task: arc_easy_alt_ov_08b +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08b +doc_to_decontamination_query: !function ../styles.template_08 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_08/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_08/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4c829aa810d309b92daa52883cd13641978e2801 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_08/c.yaml @@ -0,0 +1,6 @@ +include: ../_arc_easy_alt_yaml +group: arc_easy_alt_ov_08 +task: arc_easy_alt_ov_08c +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08c +doc_to_decontamination_query: !function ../styles.template_08 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/styles.py b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/styles.py new file mode 100644 index 0000000000000000000000000000000000000000..a4e6134801404f946ee09394659e4439037898f1 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/styles.py @@ -0,0 +1,90 @@ +import string +from functools import partial + + +def doc_to_text_base(alphabet, style, doc): + + choices = doc["choices"]["text"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + + if "\t" in style: + choice_string = "{}{}" + else: + choice_string = "{} {}" + + doc_to_text = "\n".join( + [ + "Question: " + doc["question"], + ] + + [choice_string.format(i, j) for i, j in zip(letter_list, choices)] + + ["Answer:"] + ) + + return doc_to_text + + +# Full continuation +def choice_A(doc): + return doc["choices"]["text"] + + +# Letters only +def choice_B(alphabet, style, doc): + + choices = doc["choices"]["text"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" in style: + letter_list = [letter.replace("\t", "") for letter in letter_list] + + return letter_list + + +# Letters + Full continuation +def choice_C(alphabet, style, doc): + + choices = doc["choices"]["text"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" not in style: + letter_list = [letter + " " for letter in letter_list] + + return [letter + choice for letter, choice in zip(letter_list, choices)] + + +template_01 = partial(doc_to_text_base, string.ascii_lowercase, "({})") +choice_01a = choice_A +choice_01b = partial(choice_B, string.ascii_lowercase, "({})") +choice_01c = partial(choice_C, string.ascii_lowercase, "({})") +template_02 = partial(doc_to_text_base, string.ascii_lowercase, "{})") +choice_02a = choice_A +choice_02b = partial(choice_B, string.ascii_lowercase, "{})") +choice_02c = partial(choice_C, string.ascii_lowercase, "{})") +template_03 = partial(doc_to_text_base, string.ascii_lowercase, "{}.") +choice_03a = choice_A +choice_03b = partial(choice_B, string.ascii_lowercase, "{}.") +choice_03c = partial(choice_C, string.ascii_lowercase, "{}.") +template_04 = partial(doc_to_text_base, string.ascii_lowercase, "{}\t") +choice_04a = choice_A +choice_04b = partial(choice_B, string.ascii_lowercase, "{}\t") +choice_04c = partial(choice_C, string.ascii_lowercase, "{}\t") +template_05 = partial(doc_to_text_base, string.ascii_uppercase, "({})") +choice_05a = choice_A +choice_05b = partial(choice_B, string.ascii_uppercase, "({})") +choice_05c = partial(choice_C, string.ascii_uppercase, "({})") +template_06 = partial(doc_to_text_base, string.ascii_uppercase, "{})") +choice_06a = choice_A +choice_06b = partial(choice_B, string.ascii_uppercase, "{})") +choice_06c = partial(choice_C, string.ascii_uppercase, "{})") +template_07 = partial(doc_to_text_base, string.ascii_uppercase, "{}.") +choice_07a = choice_A +choice_07b = partial(choice_B, string.ascii_uppercase, "{}.") +choice_07c = partial(choice_C, string.ascii_uppercase, "{}.") +template_08 = partial(doc_to_text_base, string.ascii_uppercase, "{}\t") +choice_08a = choice_A +choice_08b = partial(choice_B, string.ascii_uppercase, "{}\t") +choice_08c = partial(choice_C, string.ascii_uppercase, "{}\t") diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/prompt_variation/_arc_easy_alt_yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/prompt_variation/_arc_easy_alt_yaml new file mode 100644 index 0000000000000000000000000000000000000000..b17b217ec11869216ff0c59308268ba89cc59dc8 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/prompt_variation/_arc_easy_alt_yaml @@ -0,0 +1,21 @@ +dataset_path: ai2_arc +dataset_name: ARC-Easy +output_type: multiple_choice +training_split: train +validation_split: validation +test_split: test +doc_to_text: "Question: {{question}}\nAnswer:" +doc_to_target: "{{choices.label.index(answerKey)}}" +doc_to_choice: "{{choices.text}}" +should_decontaminate: true +doc_to_decontamination_query: "Question: {{question}}\nAnswer:" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: brier_score + aggregation: brier_score + higher_is_better: false diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/prompt_variation/style_01.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/prompt_variation/style_01.yaml new file mode 100644 index 0000000000000000000000000000000000000000..614f39706574b3371e4c9b1795769086d0d919e6 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/prompt_variation/style_01.yaml @@ -0,0 +1,5 @@ +include: _arc_easy_alt_yaml +group: arc_easy_alt_pv +task: arc_easy_alt_pv_01 +doc_to_text: "{{question}}" +doc_to_decontamination_query: "{{question}}" diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/prompt_variation/style_02.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/prompt_variation/style_02.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3bc2e7e934148d87b35d8e60f5d9699f79ba1569 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/prompt_variation/style_02.yaml @@ -0,0 +1,5 @@ +include: _arc_easy_alt_yaml +group: arc_easy_alt_pv +task: arc_easy_alt_pv_02 +doc_to_text: "Q: {{question}}\nA:" +doc_to_decontamination_query: "Q: {{question}}\nA:" diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/prompt_variation/style_03.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/prompt_variation/style_03.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a108887f573ae9bc7c2dea0df013b32628ffe3ed --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/prompt_variation/style_03.yaml @@ -0,0 +1,5 @@ +include: _arc_easy_alt_yaml +group: arc_easy_alt_pv +task: arc_easy_alt_pv_03 +doc_to_text: "Question: {{question}}\nAnswer:" +doc_to_decontamination_query: "Question: {{question}}\nAnswer:" diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/_template_yaml b/lm_eval/tasks/arithmetic/alternative_worlds/_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..b68001e1ae382e7885a057fccece7efbe661cdf8 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/_template_yaml @@ -0,0 +1,13 @@ +dataset_path: EleutherAI/arithmetic +dataset_name: arithmetic_1dc +output_type: loglikelihood +validation_split: validation +test_split: null +doc_to_text: "{{context}}" +doc_to_target: "{{completion}}" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: brier_score + higher_is_better: false diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/arithmetic_alt.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/arithmetic_alt.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1e678c0404c0e286f6f17976ae7d9965af41c830 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/arithmetic_alt.yaml @@ -0,0 +1,8 @@ +group: arithmetic_alt +task: + - arithmetic_alt_00 + - arithmetic_alt_01 + - arithmetic_alt_02 + - arithmetic_alt_03 + - arithmetic_alt_04 + - arithmetic_alt_05 diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/_template_00_yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/_template_00_yaml new file mode 100644 index 0000000000000000000000000000000000000000..43b73a88f2c708fe3b1979a495badc7e291c5c1d --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/_template_00_yaml @@ -0,0 +1,15 @@ +include: ../_template_yaml +group: arithmetic_alt_00 +group_alias: arithmetic (Style 00) +dataset_path: EleutherAI/arithmetic +output_type: loglikelihood +validation_split: validation +test_split: null +doc_to_text: !function ../utils.style_00 +doc_to_target: "{{completion}}" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: brier_score + higher_is_better: false diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_1dc.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_1dc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0c989c6fa588f01c51acbdf11d460996476293d9 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_1dc.yaml @@ -0,0 +1,4 @@ +include: _template_00_yaml +task: arithmetic_1dc_alt_00 +dataset_name: arithmetic_1dc +task_alias: 1dc diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_2da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_2da.yaml new file mode 100644 index 0000000000000000000000000000000000000000..11501c8f2905488fa2bcc0310033cf3fc0195124 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_2da.yaml @@ -0,0 +1,4 @@ +include: _template_00_yaml +task: arithmetic_2da_alt_00 +dataset_name: arithmetic_2da +task_alias: 2da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_2dm.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_2dm.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7a0d35b183f78d428c314823e906388258b8149e --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_2dm.yaml @@ -0,0 +1,4 @@ +include: _template_00_yaml +task: arithmetic_2dm_alt_00 +dataset_name: arithmetic_2dm +task_alias: 2dm diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_2ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_2ds.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0720c833d5e85fbbdce0f90a46615ff828de1f67 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_2ds.yaml @@ -0,0 +1,4 @@ +include: _template_00_yaml +task: arithmetic_2ds_alt_00 +dataset_name: arithmetic_2ds +task_alias: 2ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_3da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_3da.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3f6cf88a01037b858da7cc05fa92efa3ccdebec6 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_3da.yaml @@ -0,0 +1,4 @@ +include: _template_00_yaml +task: arithmetic_3da_alt_00 +dataset_name: arithmetic_3da +task_alias: 3da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_3ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_3ds.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a83dffe0e6250e55b7c8d4a60aac08467806f4f3 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_3ds.yaml @@ -0,0 +1,4 @@ +include: _template_00_yaml +task: arithmetic_3ds_alt_00 +dataset_name: arithmetic_3ds +task_alias: 3ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_4da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_4da.yaml new file mode 100644 index 0000000000000000000000000000000000000000..809998562066b1755ea27889e71a953d6d83370e --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_4da.yaml @@ -0,0 +1,4 @@ +include: _template_00_yaml +task: arithmetic_4da_alt_00 +dataset_name: arithmetic_4da +task_alias: 4da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_4ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_4ds.yaml new file mode 100644 index 0000000000000000000000000000000000000000..59a6e5a040e47756a73c5582835d2b2db51c8f39 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_4ds.yaml @@ -0,0 +1,4 @@ +include: _template_00_yaml +task: arithmetic_4ds_alt_00 +dataset_name: arithmetic_4ds +task_alias: 4ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_5da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_5da.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f172341be3a5291520f08e7785e81df5208206e6 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_5da.yaml @@ -0,0 +1,4 @@ +include: _template_00_yaml +task: arithmetic_5da_alt_00 +dataset_name: arithmetic_5da +task_alias: 5da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_5ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_5ds.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2da655459ad87eff58e242338468c35f5f0427af --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_5ds.yaml @@ -0,0 +1,4 @@ +include: _template_00_yaml +task: arithmetic_5ds_alt_00 +dataset_name: arithmetic_5ds +task_alias: 5ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/_template_01_yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/_template_01_yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f10c542ca5f14446f18e35bb10466c81fd0f919 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/_template_01_yaml @@ -0,0 +1,15 @@ +include: ../_template_yaml +group: arithmetic_alt_01 +group_alias: arithmetic (Style 01) +dataset_path: EleutherAI/arithmetic +output_type: loglikelihood +validation_split: validation +test_split: null +doc_to_text: !function ../utils.style_01 +doc_to_target: "{{completion}}" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: brier_score + higher_is_better: false diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_1dc.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_1dc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2764ccb29ae1260a0125278b4a87a0f4de6e372c --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_1dc.yaml @@ -0,0 +1,4 @@ +include: _template_01_yaml +task: arithmetic_1dc_alt_01 +dataset_name: arithmetic_1dc +task_alias: 1dc diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2da.yaml new file mode 100644 index 0000000000000000000000000000000000000000..753eef6a99f2bb802dcefb3967d1c05f7b047c34 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2da.yaml @@ -0,0 +1,4 @@ +include: _template_01_yaml +task: arithmetic_2da_alt_01 +dataset_name: arithmetic_2da +task_alias: 2da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2dm.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2dm.yaml new file mode 100644 index 0000000000000000000000000000000000000000..abbbecc408e03b930f0443d028cdeaa369978342 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2dm.yaml @@ -0,0 +1,4 @@ +include: _template_01_yaml +task: arithmetic_2dm_alt_01 +dataset_name: arithmetic_2dm +task_alias: 2dm diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2ds.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3f069e2898986d51e806b7827bcb6034d409603f --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2ds.yaml @@ -0,0 +1,4 @@ +include: _template_01_yaml +task: arithmetic_2ds_alt_01 +dataset_name: arithmetic_2ds +task_alias: 2ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_3da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_3da.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6fa481f512ed6799905cf91dba0c146a93729eed --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_3da.yaml @@ -0,0 +1,4 @@ +include: _template_01_yaml +task: arithmetic_3da_alt_01 +dataset_name: arithmetic_3da +task_alias: 3da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_3ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_3ds.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e8d27b4905d97a5d3bca388b275234672c8d3a4 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_3ds.yaml @@ -0,0 +1,4 @@ +include: _template_01_yaml +task: arithmetic_3ds_alt_01 +dataset_name: arithmetic_3ds +task_alias: 3ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_4da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_4da.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5564a98c73aff64a39d518d0d7c7b29a39b49464 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_4da.yaml @@ -0,0 +1,4 @@ +include: _template_01_yaml +task: arithmetic_4da_alt_01 +dataset_name: arithmetic_4da +task_alias: 4da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_4ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_4ds.yaml new file mode 100644 index 0000000000000000000000000000000000000000..765400b66876775e0e1b94e1f95b80cd0d2aa0cc --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_4ds.yaml @@ -0,0 +1,4 @@ +include: _template_01_yaml +task: arithmetic_4ds_alt_01 +dataset_name: arithmetic_4ds +task_alias: 4ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_5da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_5da.yaml new file mode 100644 index 0000000000000000000000000000000000000000..44548858f9742edbe1c4fa31545d5c2d269ce3be --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_5da.yaml @@ -0,0 +1,4 @@ +include: _template_01_yaml +task: arithmetic_5da_alt_01 +dataset_name: arithmetic_5da +task_alias: 5da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_5ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_5ds.yaml new file mode 100644 index 0000000000000000000000000000000000000000..571bc80dec4069aaeb5c82960e3f5993f7786a02 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_5ds.yaml @@ -0,0 +1,4 @@ +include: _template_01_yaml +task: arithmetic_5ds_alt_01 +dataset_name: arithmetic_5ds +task_alias: 5ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/_template_02_yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/_template_02_yaml new file mode 100644 index 0000000000000000000000000000000000000000..2e2a3f5ca24266a13c0c63f3de573eb125b95d4c --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/_template_02_yaml @@ -0,0 +1,15 @@ +include: ../_template_yaml +group: arithmetic_alt_02 +group_alias: arithmetic (Style 02) +dataset_path: EleutherAI/arithmetic +output_type: loglikelihood +validation_split: validation +test_split: null +doc_to_text: !function ../utils.style_02 +doc_to_target: "{{completion}}" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: brier_score + higher_is_better: false diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_1dc.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_1dc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6d3184dad9e6f9662348d29b1a4032b119fdcf73 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_1dc.yaml @@ -0,0 +1,4 @@ +include: _template_02_yaml +task: arithmetic_1dc_alt_02 +dataset_name: arithmetic_1dc +task_alias: 1dc diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2da.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2a7a9e59776f2b11ca2dfcfb2df2b1ca7e1a8a8a --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2da.yaml @@ -0,0 +1,4 @@ +include: _template_02_yaml +task: arithmetic_2da_alt_02 +dataset_name: arithmetic_2da +task_alias: 2da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2dm.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2dm.yaml new file mode 100644 index 0000000000000000000000000000000000000000..30c38a6d376a8d220028177d6aed51f853e763c2 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2dm.yaml @@ -0,0 +1,4 @@ +include: _template_02_yaml +task: arithmetic_2dm_alt_02 +dataset_name: arithmetic_2dm +task_alias: 2dm diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2ds.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ab18bce0abcd7ac63600879c566a98d2116f60bc --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2ds.yaml @@ -0,0 +1,4 @@ +include: _template_02_yaml +task: arithmetic_2ds_alt_02 +dataset_name: arithmetic_2ds +task_alias: 2ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_3da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_3da.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e59c3798dbd3f0c36c1899d2a5919ff23bfef509 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_3da.yaml @@ -0,0 +1,4 @@ +include: _template_02_yaml +task: arithmetic_3da_alt_02 +dataset_name: arithmetic_3da +task_alias: 3da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_3ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_3ds.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6d37ef2a52ff3cb3510588b2047cdb71293b0a86 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_3ds.yaml @@ -0,0 +1,4 @@ +include: _template_02_yaml +task: arithmetic_3ds_alt_02 +dataset_name: arithmetic_3ds +task_alias: 3ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_4da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_4da.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eabbb1a0ecb8318c6a01fdb6efe27b1e7caf3c6e --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_4da.yaml @@ -0,0 +1,4 @@ +include: _template_02_yaml +task: arithmetic_4da_alt_02 +dataset_name: arithmetic_4da +task_alias: 4da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_4ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_4ds.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8ce2bb7984b3aa7298922f62b8e5a6f4fa0c98a6 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_4ds.yaml @@ -0,0 +1,4 @@ +include: _template_02_yaml +task: arithmetic_4ds_alt_02 +dataset_name: arithmetic_4ds +task_alias: 4ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_5da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_5da.yaml new file mode 100644 index 0000000000000000000000000000000000000000..48ef1aaa6cd2ccf0c266faa29e89212c6077534d --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_5da.yaml @@ -0,0 +1,4 @@ +include: _template_02_yaml +task: arithmetic_5da_alt_02 +dataset_name: arithmetic_5da +task_alias: 5da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_5ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_5ds.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1941ff65bf77dbd2fcef1fb0d8a8f809563298c7 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_5ds.yaml @@ -0,0 +1,4 @@ +include: _template_02_yaml +task: arithmetic_5ds_alt_02 +dataset_name: arithmetic_5ds +task_alias: 5ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/_template_03_yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/_template_03_yaml new file mode 100644 index 0000000000000000000000000000000000000000..275559d18b812374402f0f16a34cd7bdfc173001 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/_template_03_yaml @@ -0,0 +1,15 @@ +include: ../_template_yaml +group: arithmetic_alt_03 +group_alias: arithmetic (Style 03) +dataset_path: EleutherAI/arithmetic +output_type: loglikelihood +validation_split: validation +test_split: null +doc_to_text: !function ../utils.style_03 +doc_to_target: "{{completion}}" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: brier_score + higher_is_better: false diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_1dc.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_1dc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c68279f4543a09551a9c0aff79a1c120995d053c --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_1dc.yaml @@ -0,0 +1,4 @@ +include: _template_03_yaml +task: arithmetic_1dc_alt_03 +dataset_name: arithmetic_1dc +task_alias: 1dc diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2da.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0ef30ee79fc4df539806524ca88bda0e65f34d7b --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2da.yaml @@ -0,0 +1,4 @@ +include: _template_03_yaml +task: arithmetic_2da_alt_03 +dataset_name: arithmetic_2da +task_alias: 2da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2dm.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2dm.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f742d37bd1146a3ab1c5535eb84615f420e3d84f --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2dm.yaml @@ -0,0 +1,4 @@ +include: _template_03_yaml +task: arithmetic_2dm_alt_03 +dataset_name: arithmetic_2dm +task_alias: 2dm diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2ds.yaml new file mode 100644 index 0000000000000000000000000000000000000000..59a85bfbb43ec27b74554748741887db8879e5d0 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2ds.yaml @@ -0,0 +1,4 @@ +include: _template_03_yaml +task: arithmetic_2ds_alt_03 +dataset_name: arithmetic_2ds +task_alias: 2ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_3da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_3da.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d4e4d77250121ab64737dcf26a39c4873d61efea --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_3da.yaml @@ -0,0 +1,4 @@ +include: _template_03_yaml +task: arithmetic_3da_alt_03 +dataset_name: arithmetic_3da +task_alias: 3da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_3ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_3ds.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7022a855ea8054ee3faad12eb0f5b85380932f93 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_3ds.yaml @@ -0,0 +1,4 @@ +include: _template_03_yaml +task: arithmetic_3ds_alt_03 +dataset_name: arithmetic_3ds +task_alias: 3ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_4da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_4da.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0d70f4819c3206b3732d03133325ba8a603492d5 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_4da.yaml @@ -0,0 +1,4 @@ +include: _template_03_yaml +task: arithmetic_4da_alt_03 +dataset_name: arithmetic_4da +task_alias: 4da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_4ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_4ds.yaml new file mode 100644 index 0000000000000000000000000000000000000000..77cd5c6553482994b6b6d8d9c930c4b319de38bc --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_4ds.yaml @@ -0,0 +1,4 @@ +include: _template_03_yaml +task: arithmetic_4ds_alt_03 +dataset_name: arithmetic_4ds +task_alias: 4ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_5da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_5da.yaml new file mode 100644 index 0000000000000000000000000000000000000000..02aec1d145e3c2e1c0e7b583d3e3cfe197c1b8f0 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_5da.yaml @@ -0,0 +1,4 @@ +include: _template_03_yaml +task: arithmetic_5da_alt_03 +dataset_name: arithmetic_5da +task_alias: 5da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_5ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_5ds.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6bbbd3a1021c131c909c7f6c0ccad19085926aae --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_5ds.yaml @@ -0,0 +1,4 @@ +include: _template_03_yaml +task: arithmetic_5ds_alt_03 +dataset_name: arithmetic_5ds +task_alias: 5ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/_template_04_yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/_template_04_yaml new file mode 100644 index 0000000000000000000000000000000000000000..6680afad36135ecbc6c521856ce0d76b33a515c4 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/_template_04_yaml @@ -0,0 +1,15 @@ +include: ../_template_yaml +group: arithmetic_alt_04 +group_alias: arithmetic (Style 04) +dataset_path: EleutherAI/arithmetic +output_type: loglikelihood +validation_split: validation +test_split: null +doc_to_text: !function ../utils.style_04 +doc_to_target: "{{completion}}" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: brier_score + higher_is_better: false diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_1dc.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_1dc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8d940e1c16d9b866bdbee47ebe11632afc1ff126 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_1dc.yaml @@ -0,0 +1,4 @@ +include: _template_04_yaml +task: arithmetic_1dc_alt_04 +dataset_name: arithmetic_1dc +task_alias: 1dc diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2da.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0627fe5e574c13fc6d296b9967763c7910f0620f --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2da.yaml @@ -0,0 +1,4 @@ +include: _template_04_yaml +task: arithmetic_2da_alt_04 +dataset_name: arithmetic_2da +task_alias: 2da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2dm.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2dm.yaml new file mode 100644 index 0000000000000000000000000000000000000000..667259c891be338a6e8f132ebf6031cca1d9c791 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2dm.yaml @@ -0,0 +1,4 @@ +include: _template_04_yaml +task: arithmetic_2dm_alt_04 +dataset_name: arithmetic_2dm +task_alias: 2dm diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2ds.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fc45ee632da1bf6499a7d3523e1faca63ae29ecb --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2ds.yaml @@ -0,0 +1,4 @@ +include: _template_04_yaml +task: arithmetic_2ds_alt_04 +dataset_name: arithmetic_2ds +task_alias: 2ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_3da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_3da.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d1468745c175d51ea54b9dc470d75b994e8e634a --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_3da.yaml @@ -0,0 +1,4 @@ +include: _template_04_yaml +task: arithmetic_3da_alt_04 +dataset_name: arithmetic_3da +task_alias: 3da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_3ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_3ds.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d67daf131f354163a30972871991d25a6ac99e3f --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_3ds.yaml @@ -0,0 +1,4 @@ +include: _template_04_yaml +task: arithmetic_3ds_alt_04 +dataset_name: arithmetic_3ds +task_alias: 3ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_4da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_4da.yaml new file mode 100644 index 0000000000000000000000000000000000000000..339c47f4adea91ec516ff3fd22c92060d48fb405 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_4da.yaml @@ -0,0 +1,4 @@ +include: _template_04_yaml +task: arithmetic_4da_alt_04 +dataset_name: arithmetic_4da +task_alias: 4da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_4ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_4ds.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c983c896d875c0c0383f323bfc93618024ab145e --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_4ds.yaml @@ -0,0 +1,4 @@ +include: _template_04_yaml +task: arithmetic_4ds_alt_04 +dataset_name: arithmetic_4ds +task_alias: 4ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_5da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_5da.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7e9cbf0fae3e96b7eb2db870a25d61888ab0b4b5 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_5da.yaml @@ -0,0 +1,4 @@ +include: _template_04_yaml +task: arithmetic_5da_alt_04 +dataset_name: arithmetic_5da +task_alias: 5da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_5ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_5ds.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dca4bf981b5091acfbe854052ad6708150e2fe64 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_5ds.yaml @@ -0,0 +1,4 @@ +include: _template_04_yaml +task: arithmetic_5ds_alt_04 +dataset_name: arithmetic_5ds +task_alias: 5ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/_template_05_yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/_template_05_yaml new file mode 100644 index 0000000000000000000000000000000000000000..f838a10eb5dc85fede17f35f3f38ec99e942dfe1 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/_template_05_yaml @@ -0,0 +1,15 @@ +include: ../_template_yaml +group: arithmetic_alt_05 +group_alias: arithmetic (Style 05) +dataset_path: EleutherAI/arithmetic +output_type: loglikelihood +validation_split: validation +test_split: null +doc_to_text: !function ../utils.style_05 +doc_to_target: "{{completion}}" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: brier_score + higher_is_better: false diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_1dc.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_1dc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2c0f4cec2f941d216d2d74cb6f99902760ca303c --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_1dc.yaml @@ -0,0 +1,4 @@ +include: _template_05_yaml +task: arithmetic_1dc_alt_05 +dataset_name: arithmetic_1dc +task_alias: 1dc diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2da.yaml new file mode 100644 index 0000000000000000000000000000000000000000..94632d237f2e6a8c7ae85597407c223b980b4427 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2da.yaml @@ -0,0 +1,4 @@ +include: _template_05_yaml +task: arithmetic_2da_alt_05 +dataset_name: arithmetic_2da +task_alias: 2da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2dm.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2dm.yaml new file mode 100644 index 0000000000000000000000000000000000000000..df44aa6853920d52db0707cc807659d95dc4323d --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2dm.yaml @@ -0,0 +1,4 @@ +include: _template_05_yaml +task: arithmetic_2dm_alt_05 +dataset_name: arithmetic_2dm +task_alias: 2dm diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2ds.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d34f87d157fd264cab04644f93c82519bbb14b15 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2ds.yaml @@ -0,0 +1,4 @@ +include: _template_05_yaml +task: arithmetic_2ds_alt_05 +dataset_name: arithmetic_2ds +task_alias: 2ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_3da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_3da.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5bbd6b9ed694c03aae046c3fee4108aebad782b9 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_3da.yaml @@ -0,0 +1,4 @@ +include: _template_05_yaml +task: arithmetic_3da_alt_05 +dataset_name: arithmetic_3da +task_alias: 3da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_3ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_3ds.yaml new file mode 100644 index 0000000000000000000000000000000000000000..32d15f7256ce9388855794fef30e73ae8f22afb4 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_3ds.yaml @@ -0,0 +1,4 @@ +include: _template_05_yaml +task: arithmetic_3ds_alt_05 +dataset_name: arithmetic_3ds +task_alias: 3ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_4da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_4da.yaml new file mode 100644 index 0000000000000000000000000000000000000000..09b7fda5a1b217e0904cd7189816944308e0bf02 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_4da.yaml @@ -0,0 +1,4 @@ +include: _template_05_yaml +task: arithmetic_4da_alt_05 +dataset_name: arithmetic_4da +task_alias: 4da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_4ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_4ds.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2d4e4c493b776d9f24b070afa477de370385db03 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_4ds.yaml @@ -0,0 +1,4 @@ +include: _template_05_yaml +task: arithmetic_4ds_alt_05 +dataset_name: arithmetic_4ds +task_alias: 4ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_5da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_5da.yaml new file mode 100644 index 0000000000000000000000000000000000000000..12b88bc3ea1680a28f84758f6c553252b384fbdc --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_5da.yaml @@ -0,0 +1,4 @@ +include: _template_05_yaml +task: arithmetic_5da_alt_05 +dataset_name: arithmetic_5da +task_alias: 5da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_5ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_5ds.yaml new file mode 100644 index 0000000000000000000000000000000000000000..406926a2dd435da486b963ae31f996c6276776fa --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_5ds.yaml @@ -0,0 +1,4 @@ +include: _template_05_yaml +task: arithmetic_5ds_alt_05 +dataset_name: arithmetic_5ds +task_alias: 5ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/utils.py b/lm_eval/tasks/arithmetic/alternative_worlds/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..56ad74ace6f976f773112743b357289f2530c133 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/utils.py @@ -0,0 +1,36 @@ +import re + +# Original Prompt +# Question: What is (9 + 8) * 2? Answer: + + +def style_00(docs): + # What is (9 + 8) * 2? + return docs["context"] + + +def style_01(docs): + # What is (9 + 8) * 2? + return docs["context"].replace("Question: ", "").replace(" Answer:", "") + + +def style_02(docs): + # Q: What is (9 + 8) * 2? A: + return docs["context"].replace("Question: ", "Q: ").replace(" Answer:", " A:") + + +def style_03(docs): + # Solve (9 + 8) * 2. + return ( + docs["context"].replace("Question: What is", "Solve").replace(" Answer:", ".") + ) + + +def style_04(docs): + # (9 + 8) * 2 = + return docs["context"].replace("Question: What is ", "").replace(" Answer:", " =") + + +def style_05(docs): + # What is (9 + 8) * 2? Answer: + return docs["context"].replace("Question: ", "") diff --git a/lm_eval/tasks/bbh/alternative_worlds/README.md b/lm_eval/tasks/bbh/alternative_worlds/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b1412fa36ec1dd0945f5d0ed39be54bbddf4fef9 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/README.md @@ -0,0 +1,34 @@ +| Task | Prompt Variation | Output Variation | Option in Sample | +| :-----------------:| :---------------: | :---------------: |:---------------: | +| boolean_expression | Yes | Yes | No | +| causal_judgement | Yes | Yes | Yes | +| date_understanding | Yes | Yes | Yes | +| disambiguation_qa | Yes | Yes | Yes | +| dyck_languages | Yes | No | No | +| formal_fallacies | Yes | Yes | Yes | +| geometric_shapes | Yes | Yes | Yes | +| hyperbaton | Yes | Yes | Yes | +| logical_deduction_five_objects| Yes | Yes | Yes | +| logical_deduction_seven_objects| Yes | Yes | Yes | +| logical_deduction_three_objects| Yes | Yes | Yes | +| movie_recommendation| Yes | Yes | Yes | +| multistep_arithmetic_two| Yes | No | No | +| navigate | Yes | Yes | Yes | +| object_counting | Yes | No | No | +| penguins_in_a_table| Yes | Yes | Yes | +| reasoning_about_colored_objects| Yes | Yes | Yes | +| ruin_names | Yes | Yes | Yes | +| salient_translation_error_detection| Yes| Yes | Yes | +| snarks | Yes | Yes | Yes | +| sports_understanding| Yes | Yes | No | +| temporal_sequences | Yes | Yes | Yes | +| tracking_shuffled_objects_five_objects| Yes| Yes | Yes | +| tracking_shuffled_objects_seven_objects| Yes| Yes | Yes | +| tracking_shuffled_objects_three_objects| Yes| Yes | Yes | +| web_of_lies | Yes | Yes | No | +| word_sorting | Yes | No | No | + + +Notes: +- `web_of_lies` already starts with `Question: ` +- Tasks with options are `Options: (A) ...` (multiple choice) or `Options: - ...` (binary choice) diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/bbh_alt_pv_zeroshot.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/bbh_alt_pv_zeroshot.yaml new file mode 100644 index 0000000000000000000000000000000000000000..023037f6a1d8c92e31f452a38825c720086d5df8 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/bbh_alt_pv_zeroshot.yaml @@ -0,0 +1,5 @@ +group: bbh_alt_pv_zeroshot +task: + - bbh_alt_pv_01_zeroshot + - bbh_alt_pv_02_zeroshot + - bbh_alt_pv_03_zeroshot diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/_zeroshot_template_yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/_zeroshot_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..817b2c277d7317c2c825f0d8031a84bab6a5b64a --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/_zeroshot_template_yaml @@ -0,0 +1,12 @@ +group: bbh_alt_pv_01_zeroshot +dataset_path: lukaemon/bbh +output_type: multiple_choice +test_split: test +doc_to_text: !function ../../styles.styles_01 +doc_to_target: !function ../../styles.doc_to_target +doc_to_choice: !function ../../styles.doc_to_choice +num_fewshot: 0 +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/boolean_expressions.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/boolean_expressions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aae4bfc1125a1f5cdf4ef730455cd75e620bdc7a --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/boolean_expressions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "boolean_expressions" +"description": "Evaluate the result of a random Boolean expression.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_boolean_expressions" +"doc_to_target": target +"doc_to_choice": ["True", "False"] diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/causal_judgement.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/causal_judgement.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9dc5dfc34ec37542592cff963042c7a7ba4b10c2 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/causal_judgement.yaml @@ -0,0 +1,4 @@ +"dataset_name": "causal_judgement" +"description": "Answer questions about causal attribution.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_causal_judgement" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/date_understanding.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/date_understanding.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9cb5eef6474dc442a5631e65f5a3d2600c913263 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/date_understanding.yaml @@ -0,0 +1,4 @@ +"dataset_name": "date_understanding" +"description": "Infer the date from context.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_date_understanding" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/disambiguation_qa.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/disambiguation_qa.yaml new file mode 100644 index 0000000000000000000000000000000000000000..df69dd71706865b85a4746e503e0aa697cc91315 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/disambiguation_qa.yaml @@ -0,0 +1,4 @@ +"dataset_name": "disambiguation_qa" +"description": "Clarify the meaning of sentences with ambiguous pronouns.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_disambiguation_qa" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/formal_fallacies.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/formal_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..34f384cd4933fc4ac68911f446127a37b8d2cc89 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/formal_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_fallacies" +"description": "Distinguish deductively valid arguments from formal fallacies.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_formal_fallacies" +"doc_to_target": target +"doc_to_choice": ["valid", "invalid"] diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/geometric_shapes.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/geometric_shapes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0a5d175ea44fef05a6267daab5505d32fc6b5594 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/geometric_shapes.yaml @@ -0,0 +1,4 @@ +"dataset_name": "geometric_shapes" +"description": "Name geometric shapes from their SVG paths.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_geometric_shapes" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/hyperbaton.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/hyperbaton.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c51a7b933dabfde6bc6cc0d54c25a109b3958b4c --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/hyperbaton.yaml @@ -0,0 +1,4 @@ +"dataset_name": "hyperbaton" +"description": "Order adjectives correctly in English sentences.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_hyperbaton" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/logical_deduction_five_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/logical_deduction_five_objects.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e970490696b90dde1242e3c38d6203efaf68f5e6 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/logical_deduction_five_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_deduction_five_objects" +"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_logical_deduction_five_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/logical_deduction_seven_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/logical_deduction_seven_objects.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c33460c9bf600f3a3138945f376e8f53954e6112 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/logical_deduction_seven_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_deduction_seven_objects" +"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_logical_deduction_seven_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/logical_deduction_three_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/logical_deduction_three_objects.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2654f3a8feeef208ecf101a82593034fde072ec2 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/logical_deduction_three_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_deduction_three_objects" +"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_logical_deduction_three_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/movie_recommendation.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/movie_recommendation.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f71fcc096093d11b64222645e0762c4783374262 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/movie_recommendation.yaml @@ -0,0 +1,5 @@ +"dataset_name": "movie_recommendation" +"description": "Recommend movies similar to the given list of movies.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_movie_recommendation" +"process_docs": !function ../../utils.fix_movie_recommendation diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/navigate.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/navigate.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ebd0a5b3c99212dc587d85d5d25fef81f8fb765e --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/navigate.yaml @@ -0,0 +1,4 @@ +"dataset_name": "navigate" +"description": "Given a series of navigation instructions, determine whether one would end up back at the starting point.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_navigate" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/penguins_in_a_table.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/penguins_in_a_table.yaml new file mode 100644 index 0000000000000000000000000000000000000000..490e45f6e3539595849870984a011409028dab46 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/penguins_in_a_table.yaml @@ -0,0 +1,4 @@ +"dataset_name": "penguins_in_a_table" +"description": "Answer questions about a table of penguins and their attributes.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_penguins_in_a_table" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/reasoning_about_colored_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/reasoning_about_colored_objects.yaml new file mode 100644 index 0000000000000000000000000000000000000000..edbbb0921c4edb2f2ae49b5ab91b76702d99f5b8 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/reasoning_about_colored_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "reasoning_about_colored_objects" +"description": "Answer extremely simple questions about the colors of objects on a surface.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_reasoning_about_colored_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/ruin_names.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/ruin_names.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e391c296e387593550a649c3963f974829be2253 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/ruin_names.yaml @@ -0,0 +1,5 @@ +"dataset_name": "ruin_names" +"description": "Select the humorous edit that 'ruins' the input movie or musical artist name.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_ruin_names" +"process_docs": !function ../../utils.fix_ruin_names diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/salient_translation_error_detection.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/salient_translation_error_detection.yaml new file mode 100644 index 0000000000000000000000000000000000000000..40980d8329784453c8affee945d7782169968fff --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/salient_translation_error_detection.yaml @@ -0,0 +1,4 @@ +"dataset_name": "salient_translation_error_detection" +"description": "Detect the type of error in an English translation of a German source sentence.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_salient_translation_error_detection" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/snarks.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/snarks.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe58da5d3c19b5f11d81c0506390aa5b68200088 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/snarks.yaml @@ -0,0 +1,4 @@ +"dataset_name": "snarks" +"description": "Determine which of two sentences is sarcastic.\n\nAccording to Cambridge University Dictionary, sarcasm is \"the use of remarks that clearly mean the opposite of what they say, made in order to hurt someone's feelings or to criticize something in a humorous way.\" Sarcastic sentences often contain satirical or ironic utterances, hyperboles, ambivalent or witty remarks.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_snarks" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/sports_understanding.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/sports_understanding.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8ed32a13ac5e3d16fc4830f0e5ae26291883fac7 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/sports_understanding.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sports_understanding" +"description": "Determine whether an artificially constructed sentence relating to sports is plausible or not.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_sports_understanding" +"doc_to_target": target +"doc_to_choice": ["yes", "no"] diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/temporal_sequences.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/temporal_sequences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..60a5069d47f16a3f653313fc2ada0c3314ee6359 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/temporal_sequences.yaml @@ -0,0 +1,4 @@ +"dataset_name": "temporal_sequences" +"description": "Task description: Answer questions about which times certain events could have occurred.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_temporal_sequences" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/tracking_shuffled_objects_five_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/tracking_shuffled_objects_five_objects.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4df1d2fe69b40f24e42a662369ec440088394cf1 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/tracking_shuffled_objects_five_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "tracking_shuffled_objects_five_objects" +"description": "A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_tracking_shuffled_objects_five_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/tracking_shuffled_objects_seven_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/tracking_shuffled_objects_seven_objects.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7c710c67135bd78ef8e2b6c49e796cb8fd35d5d5 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/tracking_shuffled_objects_seven_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "tracking_shuffled_objects_seven_objects" +"description": "A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_tracking_shuffled_objects_seven_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/tracking_shuffled_objects_three_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/tracking_shuffled_objects_three_objects.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7ca4794216a7a0f9b8bbf6e9e896eed06b438ca9 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/tracking_shuffled_objects_three_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "tracking_shuffled_objects_three_objects" +"description": "A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_tracking_shuffled_objects_three_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/web_of_lies.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/web_of_lies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b9dc118aed8bd23b4b034243c104574878bde436 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/web_of_lies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "web_of_lies" +"description": "Evaluate a random boolean function expressed as a word problem.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_web_of_lies" +"doc_to_target": target +"doc_to_choice": ["Yes", "No"] diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/_zeroshot_template_yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/_zeroshot_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..0ee7bf709cfc8dc11cd1bc204789f7344e49e0fe --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/_zeroshot_template_yaml @@ -0,0 +1,12 @@ +group: bbh_alt_pv_02_zeroshot +dataset_path: lukaemon/bbh +output_type: multiple_choice +test_split: test +doc_to_text: !function ../../styles.styles_02 +doc_to_target: !function ../../styles.doc_to_target +doc_to_choice: !function ../../styles.doc_to_choice +num_fewshot: 0 +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/boolean_expressions.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/boolean_expressions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..383566b1bd652d12b33486c684f6133bea1650b5 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/boolean_expressions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "boolean_expressions" +"description": "Evaluate the result of a random Boolean expression.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_boolean_expressions" +"doc_to_target": target +"doc_to_choice": ["True", "False"] diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/causal_judgement.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/causal_judgement.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5eba635dbd86d76f639cafbaafad7dbc1c02ff84 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/causal_judgement.yaml @@ -0,0 +1,4 @@ +"dataset_name": "causal_judgement" +"description": "Answer questions about causal attribution.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_causal_judgement" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/date_understanding.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/date_understanding.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3e5a3e93953c98df4b266ee94499aa4f78729bc3 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/date_understanding.yaml @@ -0,0 +1,4 @@ +"dataset_name": "date_understanding" +"description": "Infer the date from context.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_date_understanding" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/disambiguation_qa.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/disambiguation_qa.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a6d8c345d8b6887129716d46ba85370cefb8a34b --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/disambiguation_qa.yaml @@ -0,0 +1,4 @@ +"dataset_name": "disambiguation_qa" +"description": "Clarify the meaning of sentences with ambiguous pronouns.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_disambiguation_qa" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/formal_fallacies.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/formal_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3f0b1975f081b4fd42a8a72ffbce0450ed09baef --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/formal_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_fallacies" +"description": "Distinguish deductively valid arguments from formal fallacies.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_formal_fallacies" +"doc_to_target": target +"doc_to_choice": ["valid", "invalid"] diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/geometric_shapes.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/geometric_shapes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..228567bb5c1a56a0d0696f6ec653501c8d4dde3b --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/geometric_shapes.yaml @@ -0,0 +1,4 @@ +"dataset_name": "geometric_shapes" +"description": "Name geometric shapes from their SVG paths.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_geometric_shapes" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/hyperbaton.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/hyperbaton.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f79fa0ce463bceaaf061269cd95e052fdda163fb --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/hyperbaton.yaml @@ -0,0 +1,4 @@ +"dataset_name": "hyperbaton" +"description": "Order adjectives correctly in English sentences.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_hyperbaton" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/logical_deduction_five_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/logical_deduction_five_objects.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bfdfdace0cbc3a3ec7a7830718273ef0880719f1 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/logical_deduction_five_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_deduction_five_objects" +"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_logical_deduction_five_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/logical_deduction_seven_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/logical_deduction_seven_objects.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bcca9b80eb34fa654d24d7722a3b69ffeb3f6b2e --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/logical_deduction_seven_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_deduction_seven_objects" +"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_logical_deduction_seven_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/logical_deduction_three_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/logical_deduction_three_objects.yaml new file mode 100644 index 0000000000000000000000000000000000000000..327eea74a0f66bb009a44b3d618216187d534a74 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/logical_deduction_three_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_deduction_three_objects" +"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_logical_deduction_three_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/movie_recommendation.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/movie_recommendation.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8ac66b5f4487930d55788638a8bae22f133f59bd --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/movie_recommendation.yaml @@ -0,0 +1,5 @@ +"dataset_name": "movie_recommendation" +"description": "Recommend movies similar to the given list of movies.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_movie_recommendation" +"process_docs": !function ../../utils.fix_movie_recommendation diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/navigate.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/navigate.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d64421029ce0c996a7eb6d434d0df42ff1368006 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/navigate.yaml @@ -0,0 +1,4 @@ +"dataset_name": "navigate" +"description": "Given a series of navigation instructions, determine whether one would end up back at the starting point.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_navigate" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/penguins_in_a_table.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/penguins_in_a_table.yaml new file mode 100644 index 0000000000000000000000000000000000000000..186526daa8e36e2bd452cf837070ca8f58a95e60 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/penguins_in_a_table.yaml @@ -0,0 +1,4 @@ +"dataset_name": "penguins_in_a_table" +"description": "Answer questions about a table of penguins and their attributes.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_penguins_in_a_table" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/reasoning_about_colored_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/reasoning_about_colored_objects.yaml new file mode 100644 index 0000000000000000000000000000000000000000..25123984bc903f0dfa0a95da6d302c1bcad7e971 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/reasoning_about_colored_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "reasoning_about_colored_objects" +"description": "Answer extremely simple questions about the colors of objects on a surface.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_reasoning_about_colored_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/ruin_names.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/ruin_names.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bb5b7b525f1f446f2f6c32e958202563e9e59516 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/ruin_names.yaml @@ -0,0 +1,5 @@ +"dataset_name": "ruin_names" +"description": "Select the humorous edit that 'ruins' the input movie or musical artist name.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_ruin_names" +"process_docs": !function ../../utils.fix_ruin_names diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/salient_translation_error_detection.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/salient_translation_error_detection.yaml new file mode 100644 index 0000000000000000000000000000000000000000..85d8fb99e06da579d4e80038b3bc34c2c5a9eff2 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/salient_translation_error_detection.yaml @@ -0,0 +1,4 @@ +"dataset_name": "salient_translation_error_detection" +"description": "Detect the type of error in an English translation of a German source sentence.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_salient_translation_error_detection" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/snarks.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/snarks.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9889128f1581dabfd50555652784cc9951a017cf --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/snarks.yaml @@ -0,0 +1,4 @@ +"dataset_name": "snarks" +"description": "Determine which of two sentences is sarcastic.\n\nAccording to Cambridge University Dictionary, sarcasm is \"the use of remarks that clearly mean the opposite of what they say, made in order to hurt someone's feelings or to criticize something in a humorous way.\" Sarcastic sentences often contain satirical or ironic utterances, hyperboles, ambivalent or witty remarks.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_snarks" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/sports_understanding.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/sports_understanding.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a7c29ba44794fdb1236b1992cce675d2207530b6 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/sports_understanding.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sports_understanding" +"description": "Determine whether an artificially constructed sentence relating to sports is plausible or not.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_sports_understanding" +"doc_to_target": target +"doc_to_choice": ["yes", "no"] diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/temporal_sequences.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/temporal_sequences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..74b748f55a950f5f2abaaad35831aec08b6150e1 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/temporal_sequences.yaml @@ -0,0 +1,4 @@ +"dataset_name": "temporal_sequences" +"description": "Task description: Answer questions about which times certain events could have occurred.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_temporal_sequences" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/tracking_shuffled_objects_five_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/tracking_shuffled_objects_five_objects.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d9c00352b70e61c8e7f688095d3dc4bd13b8bcd1 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/tracking_shuffled_objects_five_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "tracking_shuffled_objects_five_objects" +"description": "A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_tracking_shuffled_objects_five_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/tracking_shuffled_objects_seven_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/tracking_shuffled_objects_seven_objects.yaml new file mode 100644 index 0000000000000000000000000000000000000000..89a5b7851c057903a125dfe7ffab03753ad71233 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/tracking_shuffled_objects_seven_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "tracking_shuffled_objects_seven_objects" +"description": "A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_tracking_shuffled_objects_seven_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/tracking_shuffled_objects_three_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/tracking_shuffled_objects_three_objects.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6cb6b89acdbac5559a105e2eb89e273054bc791c --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/tracking_shuffled_objects_three_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "tracking_shuffled_objects_three_objects" +"description": "A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_tracking_shuffled_objects_three_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/web_of_lies.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/web_of_lies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0be43b365625d440c66e37fc33af53044d31e538 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/web_of_lies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "web_of_lies" +"description": "Evaluate a random boolean function expressed as a word problem.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_web_of_lies" +"doc_to_target": target +"doc_to_choice": ["Yes", "No"] diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/_zeroshot_template_yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/_zeroshot_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..1e492bafcfa7911a2d283640281a6f9960c9f0e3 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/_zeroshot_template_yaml @@ -0,0 +1,12 @@ +group: bbh_alt_pv_03_zeroshot +dataset_path: lukaemon/bbh +output_type: multiple_choice +test_split: test +doc_to_text: !function ../../styles.styles_03 +doc_to_target: !function ../../styles.doc_to_target +doc_to_choice: !function ../../styles.doc_to_choice +num_fewshot: 0 +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/boolean_expressions.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/boolean_expressions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..27e9b1a418cbbf568d9479fc53c1f31b6c667254 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/boolean_expressions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "boolean_expressions" +"description": "Evaluate the result of a random Boolean expression.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_boolean_expressions" +"doc_to_target": target +"doc_to_choice": ["True", "False"] diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/causal_judgement.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/causal_judgement.yaml new file mode 100644 index 0000000000000000000000000000000000000000..824ec01c0fef793b21acb2fb06ddc1a44686b73f --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/causal_judgement.yaml @@ -0,0 +1,4 @@ +"dataset_name": "causal_judgement" +"description": "Answer questions about causal attribution.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_causal_judgement" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/date_understanding.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/date_understanding.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8a93686c0781e327d02bcd9e1cce70b49d6bfbb0 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/date_understanding.yaml @@ -0,0 +1,4 @@ +"dataset_name": "date_understanding" +"description": "Infer the date from context.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_date_understanding" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/disambiguation_qa.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/disambiguation_qa.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bc4324e4822233c62667a16434fbc073a10c1db6 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/disambiguation_qa.yaml @@ -0,0 +1,4 @@ +"dataset_name": "disambiguation_qa" +"description": "Clarify the meaning of sentences with ambiguous pronouns.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_disambiguation_qa" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/formal_fallacies.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/formal_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..42d8abcc4cff65e52e7072ce519d346abb7f2de9 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/formal_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_fallacies" +"description": "Distinguish deductively valid arguments from formal fallacies.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_formal_fallacies" +"doc_to_target": target +"doc_to_choice": ["valid", "invalid"] diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/geometric_shapes.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/geometric_shapes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..719db41efabfb06994b32c8ec7288330cf9be323 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/geometric_shapes.yaml @@ -0,0 +1,4 @@ +"dataset_name": "geometric_shapes" +"description": "Name geometric shapes from their SVG paths.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_geometric_shapes" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/hyperbaton.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/hyperbaton.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7905a343478a20fa4a5d0ef2fdb4640da14d06fe --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/hyperbaton.yaml @@ -0,0 +1,4 @@ +"dataset_name": "hyperbaton" +"description": "Order adjectives correctly in English sentences.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_hyperbaton" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/logical_deduction_five_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/logical_deduction_five_objects.yaml new file mode 100644 index 0000000000000000000000000000000000000000..14566a024477ee0f7856caa228c4fc04849e50f3 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/logical_deduction_five_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_deduction_five_objects" +"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_logical_deduction_five_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/logical_deduction_seven_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/logical_deduction_seven_objects.yaml new file mode 100644 index 0000000000000000000000000000000000000000..51ba44c876dd5bcca1ab275015db7655875d53b7 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/logical_deduction_seven_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_deduction_seven_objects" +"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_logical_deduction_seven_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/logical_deduction_three_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/logical_deduction_three_objects.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bca7b49cebee3ffacb41fde23ba5b4eedac5ce8e --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/logical_deduction_three_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_deduction_three_objects" +"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_logical_deduction_three_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/movie_recommendation.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/movie_recommendation.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6e1f2ecb27b0b69ede381513eefb350ac8d91dd4 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/movie_recommendation.yaml @@ -0,0 +1,5 @@ +"dataset_name": "movie_recommendation" +"description": "Recommend movies similar to the given list of movies.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_movie_recommendation" +"process_docs": !function ../../utils.fix_movie_recommendation diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/navigate.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/navigate.yaml new file mode 100644 index 0000000000000000000000000000000000000000..393e364ee5db9846af0595df00251781f63d33d5 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/navigate.yaml @@ -0,0 +1,4 @@ +"dataset_name": "navigate" +"description": "Given a series of navigation instructions, determine whether one would end up back at the starting point.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_navigate" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/penguins_in_a_table.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/penguins_in_a_table.yaml new file mode 100644 index 0000000000000000000000000000000000000000..71adb34b8f1e0b6d3e363c3d3d1e1a1920c6e5c5 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/penguins_in_a_table.yaml @@ -0,0 +1,4 @@ +"dataset_name": "penguins_in_a_table" +"description": "Answer questions about a table of penguins and their attributes.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_penguins_in_a_table" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/reasoning_about_colored_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/reasoning_about_colored_objects.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e1ddd9a4abdc0b6a23a7a194c3e0834d9c3819ce --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/reasoning_about_colored_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "reasoning_about_colored_objects" +"description": "Answer extremely simple questions about the colors of objects on a surface.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_reasoning_about_colored_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/ruin_names.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/ruin_names.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e13bb3e6cb4329607a387a46680a222c5ad46744 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/ruin_names.yaml @@ -0,0 +1,5 @@ +"dataset_name": "ruin_names" +"description": "Select the humorous edit that 'ruins' the input movie or musical artist name.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_ruin_names" +"process_docs": !function ../../utils.fix_ruin_names diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/salient_translation_error_detection.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/salient_translation_error_detection.yaml new file mode 100644 index 0000000000000000000000000000000000000000..86db045bc54327b0a03c1ea9c57807a04ca8db53 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/salient_translation_error_detection.yaml @@ -0,0 +1,4 @@ +"dataset_name": "salient_translation_error_detection" +"description": "Detect the type of error in an English translation of a German source sentence.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_salient_translation_error_detection" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/snarks.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/snarks.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d8f2ca995cd3048ad7b13f4433d6744063edcec5 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/snarks.yaml @@ -0,0 +1,4 @@ +"dataset_name": "snarks" +"description": "Determine which of two sentences is sarcastic.\n\nAccording to Cambridge University Dictionary, sarcasm is \"the use of remarks that clearly mean the opposite of what they say, made in order to hurt someone's feelings or to criticize something in a humorous way.\" Sarcastic sentences often contain satirical or ironic utterances, hyperboles, ambivalent or witty remarks.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_snarks" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/sports_understanding.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/sports_understanding.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cb3e70d6a278e73a678d9c0ec4ea48bee3769af5 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/sports_understanding.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sports_understanding" +"description": "Determine whether an artificially constructed sentence relating to sports is plausible or not.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_sports_understanding" +"doc_to_target": target +"doc_to_choice": ["yes", "no"] diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/temporal_sequences.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/temporal_sequences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bafde6b7e86d0a0b217088e23d5084ac6b39dc89 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/temporal_sequences.yaml @@ -0,0 +1,4 @@ +"dataset_name": "temporal_sequences" +"description": "Task description: Answer questions about which times certain events could have occurred.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_temporal_sequences" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/tracking_shuffled_objects_five_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/tracking_shuffled_objects_five_objects.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bec899a4485efa9280502b083814c5dea42e64b9 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/tracking_shuffled_objects_five_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "tracking_shuffled_objects_five_objects" +"description": "A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_tracking_shuffled_objects_five_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/tracking_shuffled_objects_seven_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/tracking_shuffled_objects_seven_objects.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0150b3216e7912daf15eddc25433a83c5e4cb0cd --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/tracking_shuffled_objects_seven_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "tracking_shuffled_objects_seven_objects" +"description": "A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_tracking_shuffled_objects_seven_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/tracking_shuffled_objects_three_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/tracking_shuffled_objects_three_objects.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9e08ead90e9e8cf3e0308cd5e706889bd1051aaa --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/tracking_shuffled_objects_three_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "tracking_shuffled_objects_three_objects" +"description": "A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_tracking_shuffled_objects_three_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/web_of_lies.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/web_of_lies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..12ff9bc3ece38df08f7f34faae40e8f0831aa71f --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/web_of_lies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "web_of_lies" +"description": "Evaluate a random boolean function expressed as a word problem.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_web_of_lies" +"doc_to_target": target +"doc_to_choice": ["Yes", "No"] diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/styles.py b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/styles.py new file mode 100644 index 0000000000000000000000000000000000000000..ae872179764e1a8c71be41b39750e4bcb2461a67 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/styles.py @@ -0,0 +1,72 @@ +import re +import string + +yes_no = ["Yes", "No"] + + +def parse_choices(doc): + + input_text = doc["input"] + choice_string = input_text.split("Options:") + if len(choice_string) == 2: + choice_string = choice_string[-1] + if ("- Yes" in choice_string) and ("- No" in choice_string): + choices = yes_no + else: + choices = [ + c[4:].rstrip("\n") + for c in re.findall(r"\([A-Z]\) .*?\n|\([A-Z]\) .*?$", choice_string) + ] + return choices + else: + return [] + + +def styles_01(doc): + # Check for choices and remove them + choices = parse_choices(doc) + if choices != []: + doc_to_text = doc["input"].split("Options:")[0] + if doc_to_text[-1] in ["\n", " "]: + doc_to_text = doc_to_text[:-1] + else: + doc_to_text = doc["input"] + return doc_to_text + + +def styles_02(doc): + # Check for choices and remove them + doc_to_text = styles_01(doc) + return "Q: " + doc_to_text + "\nA:" + + +def styles_03(doc): + # Check for choices and remove them + doc_to_text = styles_01(doc) + return "Question: " + doc_to_text + "\nAnswer:" + + +def doc_to_choice(doc): + return parse_choices(doc) + + +def doc_to_target(doc): + target = doc["target"] + try: + if target in ["Yes", "No"]: + return yes_no.index(target) + else: + return string.ascii_uppercase.index(target[1:-1]) + # else: + # return parse_choices(doc).index(target) + + except Exception as err: + print("Full Doc") + print(doc) + print("Choices") + print(parse_choices(doc)) + print("Error") + print(err) + import sys + + sys.exit() diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/utils.py b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7cd31a486c7a7c893ee572094c6320bb03f9e550 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/utils.py @@ -0,0 +1,31 @@ +# For fixing line 163 in `movie_recommendation` + + +def fix_movie_recommendation(data): + def _fix(doc): + if doc["target"] == "Monsters, Inc": + doc[ + "input" + ] = "Find a movie similar to Minority Report, Shrek, Catch Me If You Can, Aladdin:\nOptions:\n(A) Monsters, Inc\n(B) Children of the Night\n(C) The Incredible Shrinking Man\n(D) Town & Country" + doc["target"] = "(A)" + return doc + + return data.map(_fix) + + +def fix_ruin_names(data): + def _fix(doc): + if doc["target"] == "dearth, wind, & fire": + doc[ + "input" + ] = "Which of the following is a humorous edit of this artist or movie name: 'earth, wind, & fire'?\nOptions:\n(A) eareth, wind, & fire\n(B) earth, bind, & fire\n(C) earthm wind, & fire\n(D) dearth, wind, & fire" + doc["target"] = "(D)" + + elif doc["target"] == "rita, sue and bob poo": + doc[ + "input" + ] = "Which of the following is a humorous edit of this artist or movie name: 'rita, sue and bob too'?\nOptions:\n(A) rita, sue and bob too\n(B) rita, sue and bob poo\n(C) rita, sue and box too\n(D) rita,y sue and bob too" + doc["target"] = "(B)" + return doc + + return data.map(_fix) diff --git a/lm_eval/tasks/bigbench/alternative_worlds/aux_metric.py b/lm_eval/tasks/bigbench/alternative_worlds/aux_metric.py new file mode 100644 index 0000000000000000000000000000000000000000..bd87fbec6e5fd9c3048323d738f0c0019bd90cc8 --- /dev/null +++ b/lm_eval/tasks/bigbench/alternative_worlds/aux_metric.py @@ -0,0 +1,14 @@ +from textdistance import levenshtein +from transformers import AutoTokenizer + +# Change this tokenizer to fit with the model you are using. +tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-2.8b", max_new_tokens=128) + + +def token_edit_distance(references, predictions, **kwargs): + print(references) + print(predictions) + print("###") + ref_tokens = tokenizer.encode(references[0]) + pred_tokens = tokenizer.encode(predictions[0]) + return levenshtein.distance(ref_tokens, pred_tokens) diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/README.md b/lm_eval/tasks/hellaswag/alternative_worlds/README.md new file mode 100644 index 0000000000000000000000000000000000000000..93600ae181761370baf6c9fdce7860af5737a978 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/README.md @@ -0,0 +1,20 @@ + + +Investigate affect of letter options +- (A) +- A) +- A. +- A\t +- (a) +- a) +- a. +- a\t + +Answer types: +- letters only + - original option + - just letter +- letters + continuation + - original option + - just letter +- continuation diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/_hellaswag_alt_yaml b/lm_eval/tasks/hellaswag/alternative_worlds/_hellaswag_alt_yaml new file mode 100644 index 0000000000000000000000000000000000000000..8a88c7a147f5f25f99f23e23b672cfc6617ab33e --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/_hellaswag_alt_yaml @@ -0,0 +1,19 @@ +dataset_path: Rowan/hellaswag +dataset_name: null +output_type: multiple_choice +training_split: train +validation_split: validation +test_split: null +process_docs: !function ../utils.process_docs +doc_to_text: "{{query}}" +doc_to_target: "{{label}}" +doc_to_choice: "{{choices}}" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: brier_score + higher_is_better: false diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/hellaswag_alt.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/hellaswag_alt.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c60df9994ed4d530f8db1bd51960ed920ad90f6d --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/hellaswag_alt.yaml @@ -0,0 +1,10 @@ +group: hellaswag_alt +task: + - hellaswag_01 + - hellaswag_02 + - hellaswag_03 + - hellaswag_04 + - hellaswag_05 + - hellaswag_06 + - hellaswag_07 + - hellaswag_08 diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_01/a.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_01/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..db54afc016b51d213099389f517f191e66ed878f --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_01/a.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_01 +group_alias: style_01 +task: hellaswag_01a +task_alias: a +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01a diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_01/b.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_01/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7c5890ca93686c4798a8a463e94d08a144c01262 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_01/b.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_01 +group_alias: style_01 +task: hellaswag_01b +task_alias: b +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01b diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_01/c.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_01/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2e7d61eb38fbdcd249e04560bdd9f7f05127eb9f --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_01/c.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_01 +group_alias: style_01 +task: hellaswag_01c +task_alias: c +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01c diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_02/a.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_02/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bae1a7c4977fb91de5ff40e62bb4011fdc6ab385 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_02/a.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_02 +group_alias: style_02 +task: hellaswag_02a +task_alias: a +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02a diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_02/b.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_02/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b808d99ba3a43f7b8521aad50dbfd19d07d34707 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_02/b.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_02 +group_alias: style_02 +task: hellaswag_02b +task_alias: b +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02b diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_02/c.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_02/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3fff20f2fe001e115724f8abb9bdbfe50d712a6e --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_02/c.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_02 +group_alias: style_02 +task: hellaswag_02c +task_alias: c +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02c diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_03/a.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_03/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9b87ae7136a35f8b11bd3b69d630aa23050cc0ea --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_03/a.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_03 +group_alias: style_03 +task: hellaswag_03a +task_alias: a +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03a diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_03/b.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_03/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1c93ffb32c12a61d71ed049c34d2ec6264cc024a --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_03/b.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_03 +group_alias: style_03 +task: hellaswag_03b +task_alias: b +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03b diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_03/c.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_03/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f33ccb84b659619d7c8dbe7de2779b8c517d2d62 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_03/c.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_03 +group_alias: style_03 +task: hellaswag_03c +task_alias: c +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03c diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_04/a.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_04/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bb5bd68f3c14b8e45072f8572e800156d534841b --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_04/a.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_04 +group_alias: style_04 +task: hellaswag_04a +task_alias: a +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04a diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_04/b.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_04/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7a1e6156cc6a68cff8ff5fd67c5b2ee3faadc449 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_04/b.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_04 +group_alias: style_04 +task: hellaswag_04b +task_alias: b +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04b diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_04/c.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_04/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0a30c7ab0a643576188d719a1b1f189f9c1f0d88 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_04/c.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_04 +group_alias: style_04 +task: hellaswag_04c +task_alias: c +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04c diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_05/a.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_05/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cd7b773ba9bd237ab4eab9338c1ecc5c131a438c --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_05/a.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_05 +group_alias: style_05 +task: hellaswag_05a +task_alias: a +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05a diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_05/b.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_05/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4cbce6bdec683b25e3faa75d73d1937ed44df9b9 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_05/b.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_05 +group_alias: style_05 +task: hellaswag_05b +task_alias: b +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05b diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_05/c.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_05/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ca7e8a21e8c97735df8f632d6ff275ef349115b1 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_05/c.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_05 +group_alias: style_05 +task: hellaswag_05c +task_alias: c +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05c diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_06/a.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_06/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..95a007b0bce2b0f562194e6b207e0c04c98de126 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_06/a.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_06 +group_alias: style_06 +task: hellaswag_06a +task_alias: a +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06a diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_06/b.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_06/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3df5d9420c41f4bc7cf94e5698b4a05f9e457624 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_06/b.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_06 +group_alias: style_06 +task: hellaswag_06b +task_alias: b +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06b diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_06/c.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_06/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a690f6e7bad1a0a68e13376b2e7948b09fcf31db --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_06/c.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_06 +group_alias: style_06 +task: hellaswag_06c +task_alias: c +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06c diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_07/a.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_07/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5d3d7329ce240c1b7f43ccc25aab33baf5b0cd7e --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_07/a.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_07 +group_alias: style_07 +task: hellaswag_07a +task_alias: a +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07a diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_07/b.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_07/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3764e9e5f63378d7b3ff007efd03687540b0ef81 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_07/b.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_07 +group_alias: style_07 +task: hellaswag_07b +task_alias: b +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07b diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_07/c.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_07/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9be6f58c82638e7fcd746a5264a20e491a6d6b83 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_07/c.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_07 +group_alias: style_07 +task: hellaswag_07c +task_alias: c +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07c diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_08/a.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_08/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d7d5e465699c0fc3db843a0dab8fd25f655b0715 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_08/a.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_08 +group_alias: style_08 +task: hellaswag_08a +task_alias: a +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08a diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_08/b.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_08/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7efccdd152b56d15aa16f37cc97cf10fa7af5447 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_08/b.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_08 +group_alias: style_08 +task: hellaswag_08b +task_alias: b +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08b diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_08/c.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_08/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4642a3c492a635c42fb694a082b2b3aaa878c2c1 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_08/c.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_08 +group_alias: style_08 +task: hellaswag_08c +task_alias: c +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08c diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/styles.py b/lm_eval/tasks/hellaswag/alternative_worlds/styles.py new file mode 100644 index 0000000000000000000000000000000000000000..4aa8c268ec4faf5be07e848ebe79ecd56af0621e --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/styles.py @@ -0,0 +1,90 @@ +import string +from functools import partial + + +def doc_to_text_base(alphabet, style, doc): + + choices = doc["choices"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + + if "\t" in style: + choice_string = "{}{}" + else: + choice_string = "{} {}" + + doc_to_text = "\n\n".join( + [ + doc["query"] + "...", + " What is the most appropriate continuation?", + ] + + [choice_string.format(i, j) for i, j in zip(letter_list, choices)] + ) + + return doc_to_text + + +# Full continuation +def choice_A(doc): + return doc["choices"] + + +# Letters only +def choice_B(alphabet, style, doc): + + choices = doc["choices"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" in style: + letter_list = [letter.replace("\t", "") for letter in letter_list] + + return letter_list + + +# Letters + Full continuation +def choice_C(alphabet, style, doc): + + choices = doc["choices"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" not in style: + letter_list = [letter + " " for letter in letter_list] + + return [letter + choice for letter, choice in zip(letter_list, doc["choices"])] + + +template_01 = partial(doc_to_text_base, string.ascii_lowercase, "({})") +choice_01a = choice_A +choice_01b = partial(choice_B, string.ascii_lowercase, "({})") +choice_01c = partial(choice_C, string.ascii_lowercase, "({})") +template_02 = partial(doc_to_text_base, string.ascii_lowercase, "{})") +choice_02a = choice_A +choice_02b = partial(choice_B, string.ascii_lowercase, "{})") +choice_02c = partial(choice_C, string.ascii_lowercase, "{})") +template_03 = partial(doc_to_text_base, string.ascii_lowercase, "{}.") +choice_03a = choice_A +choice_03b = partial(choice_B, string.ascii_lowercase, "{}.") +choice_03c = partial(choice_C, string.ascii_lowercase, "{}.") +template_04 = partial(doc_to_text_base, string.ascii_lowercase, "{}\t") +choice_04a = choice_A +choice_04b = partial(choice_B, string.ascii_lowercase, "{}\t") +choice_04c = partial(choice_C, string.ascii_lowercase, "{}\t") +template_05 = partial(doc_to_text_base, string.ascii_uppercase, "({})") +choice_05a = choice_A +choice_05b = partial(choice_B, string.ascii_uppercase, "({})") +choice_05c = partial(choice_C, string.ascii_uppercase, "({})") +template_06 = partial(doc_to_text_base, string.ascii_uppercase, "{})") +choice_06a = choice_A +choice_06b = partial(choice_B, string.ascii_uppercase, "{})") +choice_06c = partial(choice_C, string.ascii_uppercase, "{})") +template_07 = partial(doc_to_text_base, string.ascii_uppercase, "{}.") +choice_07a = choice_A +choice_07b = partial(choice_B, string.ascii_uppercase, "{}.") +choice_07c = partial(choice_C, string.ascii_uppercase, "{}.") +template_08 = partial(doc_to_text_base, string.ascii_uppercase, "{}\t") +choice_08a = choice_A +choice_08b = partial(choice_B, string.ascii_uppercase, "{}\t") +choice_08c = partial(choice_C, string.ascii_uppercase, "{}\t") diff --git a/lm_eval/tasks/hellaswag/hellaswag.yaml b/lm_eval/tasks/hellaswag/hellaswag.yaml index ec627da7d46ea6f31bd0ca68c60e21fd9332db9d..bb8bd8e2b831147272395e6d7ab64bb1c7e177bf 100644 --- a/lm_eval/tasks/hellaswag/hellaswag.yaml +++ b/lm_eval/tasks/hellaswag/hellaswag.yaml @@ -1,7 +1,7 @@ group: - multiple_choice task: hellaswag -dataset_path: hellaswag +dataset_path: Rowan/hellaswag dataset_name: null output_type: multiple_choice training_split: train diff --git a/lm_eval/tasks/mathqa/alternative_worlds/output_variation/README.md b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6f89dda2535411070c5f2a67b0af68d53f37fe6a --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/README.md @@ -0,0 +1,18 @@ + +Investigate affect of letter options +- (A) +- A) +- A. +- A\t +- (a) +- a) +- a. +- a\t +Answer types: +- letters only + - original option + - just letter +- letters + continuation + - original option + - just letter +- continuation diff --git a/lm_eval/tasks/mathqa/alternative_worlds/output_variation/_mathqa_alt_yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/_mathqa_alt_yaml new file mode 100644 index 0000000000000000000000000000000000000000..cca783409e94450515b1ae36bb1fa3eefb01e4d8 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/_mathqa_alt_yaml @@ -0,0 +1,10 @@ +dataset_path: math_qa +output_type: multiple_choice +training_split: train +validation_split: validation +test_split: test +doc_to_target: "{{['a', 'b', 'c', 'd', 'e'].index(correct)}}" +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mathqa/alternative_worlds/output_variation/mathqa_alt.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/mathqa_alt.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d13679d1aa1b20da995e041471ed84cd3d5708c4 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/mathqa_alt.yaml @@ -0,0 +1,10 @@ +group: mathqa_alt_ov +task: + - mathqa_alt_ov_01 + - mathqa_alt_ov_02 + - mathqa_alt_ov_03 + - mathqa_alt_ov_04 + - mathqa_alt_ov_05 + - mathqa_alt_ov_06 + - mathqa_alt_ov_07 + - mathqa_alt_ov_08 diff --git a/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_01/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_01/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ca38bfd770eab0a885b16c0585bafa02bc57a923 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_01/a.yaml @@ -0,0 +1,5 @@ +include: ../_mathqa_alt_yaml +group: mathqa_alt_ov_01 +task: mathqa_alt_ov_01a +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01a diff --git a/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_01/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_01/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ed2eae14816ce0d9fb3ce127a7f23500b223dbcc --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_01/b.yaml @@ -0,0 +1,5 @@ +include: ../_mathqa_alt_yaml +group: mathqa_alt_ov_01 +task: mathqa_alt_ov_01b +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01b diff --git a/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_01/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_01/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..35a9d0417eaa45d9ba8aab2f044b7057d5b6fdc3 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_01/c.yaml @@ -0,0 +1,5 @@ +include: ../_mathqa_alt_yaml +group: mathqa_alt_ov_01 +task: mathqa_alt_ov_01c +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01c diff --git a/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_02/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_02/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d6a259f29d3899d5e34c6d6474c57686e6000f20 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_02/a.yaml @@ -0,0 +1,5 @@ +include: ../_mathqa_alt_yaml +group: mathqa_alt_ov_02 +task: mathqa_alt_ov_02a +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02a diff --git a/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_02/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_02/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..31ec5c1bc394b8235e680fd2c7b5cfc3e17d06f6 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_02/b.yaml @@ -0,0 +1,5 @@ +include: ../_mathqa_alt_yaml +group: mathqa_alt_ov_02 +task: mathqa_alt_ov_02b +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02b diff --git a/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_02/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_02/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4a5e1dfaf7e06d2fe5c4ef9385f1ad34e9ef20f8 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_02/c.yaml @@ -0,0 +1,5 @@ +include: ../_mathqa_alt_yaml +group: mathqa_alt_ov_02 +task: mathqa_alt_ov_02c +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02c diff --git a/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_03/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_03/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..22c1466c0438ef02c95d5248724a4a6d4481340a --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_03/a.yaml @@ -0,0 +1,5 @@ +include: ../_mathqa_alt_yaml +group: mathqa_alt_ov_03 +task: mathqa_alt_ov_03a +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03a diff --git a/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_03/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_03/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..83ae0a0eec8b0b9fc9953e09e4f9d4353b246634 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_03/b.yaml @@ -0,0 +1,5 @@ +include: ../_mathqa_alt_yaml +group: mathqa_alt_ov_03 +task: mathqa_alt_ov_03b +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03b diff --git a/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_03/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_03/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f9bb436f121076540608c6a0353eed59b49cc496 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_03/c.yaml @@ -0,0 +1,5 @@ +include: ../_mathqa_alt_yaml +group: mathqa_alt_ov_03 +task: mathqa_alt_ov_03c +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03c diff --git a/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_04/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_04/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fa9f1ee74978b5ad7d8568cc8b15c049d038a9d7 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_04/a.yaml @@ -0,0 +1,5 @@ +include: ../_mathqa_alt_yaml +group: mathqa_alt_ov_04 +task: mathqa_alt_ov_04a +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04a diff --git a/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_04/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_04/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b68fa2b459bd47d85c441b6cc91873ac4f42364b --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_04/b.yaml @@ -0,0 +1,5 @@ +include: ../_mathqa_alt_yaml +group: mathqa_alt_ov_04 +task: mathqa_alt_ov_04b +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04b diff --git a/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_04/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_04/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d7f8eb2020c9e5a74ede4a5b32b6cc000a1c6737 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_04/c.yaml @@ -0,0 +1,5 @@ +include: ../_mathqa_alt_yaml +group: mathqa_alt_ov_04 +task: mathqa_alt_ov_04c +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04c diff --git a/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_05/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_05/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8431baa5044de0493d5fa4edabcfd5bd0ba01e21 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_05/a.yaml @@ -0,0 +1,5 @@ +include: ../_mathqa_alt_yaml +group: mathqa_alt_ov_05 +task: mathqa_alt_ov_05a +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05a diff --git a/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_05/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_05/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b13d647918e4dd04988e38d0e0a3eff9f8d11ed0 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_05/b.yaml @@ -0,0 +1,5 @@ +include: ../_mathqa_alt_yaml +group: mathqa_alt_ov_05 +task: mathqa_alt_ov_05b +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05b diff --git a/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_05/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_05/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fb23a69d3ce4acdd79ff82d2519de37bb4c1cd1b --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_05/c.yaml @@ -0,0 +1,5 @@ +include: ../_mathqa_alt_yaml +group: mathqa_alt_ov_05 +task: mathqa_alt_ov_05c +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05c diff --git a/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_06/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_06/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..99e8c27aee248e866878158d43b681a038103718 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_06/a.yaml @@ -0,0 +1,5 @@ +include: ../_mathqa_alt_yaml +group: mathqa_alt_ov_06 +task: mathqa_alt_ov_06a +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06a diff --git a/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_06/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_06/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..77fd1d6214dc3c6c805f4dc2caf54169f83ad661 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_06/b.yaml @@ -0,0 +1,5 @@ +include: ../_mathqa_alt_yaml +group: mathqa_alt_ov_06 +task: mathqa_alt_ov_06b +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06b diff --git a/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_06/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_06/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..af60a24e34c3c32edfb76a2d6f7d6d4d0720c385 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_06/c.yaml @@ -0,0 +1,5 @@ +include: ../_mathqa_alt_yaml +group: mathqa_alt_ov_06 +task: mathqa_alt_ov_06c +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06c diff --git a/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_07/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_07/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..00a8115505a7bf08eeb0d2f19ec458a9bb09fff2 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_07/a.yaml @@ -0,0 +1,5 @@ +include: ../_mathqa_alt_yaml +group: mathqa_alt_ov_07 +task: mathqa_alt_ov_07a +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07a diff --git a/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_07/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_07/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6f9e1b2af1f7cc8c08ad6548dbfab9b57a788083 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_07/b.yaml @@ -0,0 +1,5 @@ +include: ../_mathqa_alt_yaml +group: mathqa_alt_ov_07 +task: mathqa_alt_ov_07b +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07b diff --git a/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_07/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_07/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0b40d282fb99f0a94d44ced2bc6affe288e5c0b2 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_07/c.yaml @@ -0,0 +1,5 @@ +include: ../_mathqa_alt_yaml +group: mathqa_alt_ov_07 +task: mathqa_alt_ov_07c +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07c diff --git a/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_08/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_08/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f2bb37886480229da14eb1b1ab01939662801beb --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_08/a.yaml @@ -0,0 +1,5 @@ +include: ../_mathqa_alt_yaml +group: mathqa_alt_ov_08 +task: mathqa_alt_ov_08a +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08a diff --git a/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_08/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_08/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4b3b2a674e04167911b46b32b9814e4878493fdf --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_08/b.yaml @@ -0,0 +1,5 @@ +include: ../_mathqa_alt_yaml +group: mathqa_alt_ov_08 +task: mathqa_alt_ov_08b +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08b diff --git a/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_08/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_08/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2242027de083b7119eaea4f92df79bf489ca8192 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_08/c.yaml @@ -0,0 +1,5 @@ +include: ../_mathqa_alt_yaml +group: mathqa_alt_ov_08 +task: mathqa_alt_ov_08c +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08c diff --git a/lm_eval/tasks/mathqa/alternative_worlds/output_variation/styles.py b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/styles.py new file mode 100644 index 0000000000000000000000000000000000000000..09bc91f373ac1d2e93ff10459722460075c05719 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/styles.py @@ -0,0 +1,86 @@ +import re +import string +from functools import partial + + +def parse_choices(doc): + choices = [ + c[4:].rstrip(" ,") + for c in re.findall(r"[abcd] \) .*?, |e \) .*?$", doc["options"]) + ] + return choices + + +def doc_to_text_base(alphabet, style, doc): + choices = parse_choices(doc) + num = len(choices) + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" in style: + choice_string = "{}{}" + else: + choice_string = "{} {}" + doc_to_text = "\n".join( + ["Question: " + doc["Problem"]] + + [choice_string.format(i, j) for i, j in zip(letter_list, choices)] + + ["Answer:"] + ) + return doc_to_text + + +# Full continuation +def choice_A(doc): + return parse_choices(doc) + + +# Letters only +def choice_B(alphabet, style, doc): + choices = parse_choices(doc) + num = len(choices) + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" in style: + letter_list = [letter.replace("\t", "") for letter in letter_list] + return letter_list + + +# Letters + Full continuation +def choice_C(alphabet, style, doc): + choices = parse_choices(doc) + num = len(choices) + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" not in style: + letter_list = [letter + " " for letter in letter_list] + return [letter + choice for letter, choice in zip(letter_list, choices)] + + +template_01 = partial(doc_to_text_base, string.ascii_lowercase, "({})") +choice_01a = choice_A +choice_01b = partial(choice_B, string.ascii_lowercase, "({})") +choice_01c = partial(choice_C, string.ascii_lowercase, "({})") +template_02 = partial(doc_to_text_base, string.ascii_lowercase, "{})") +choice_02a = choice_A +choice_02b = partial(choice_B, string.ascii_lowercase, "{})") +choice_02c = partial(choice_C, string.ascii_lowercase, "{})") +template_03 = partial(doc_to_text_base, string.ascii_lowercase, "{}.") +choice_03a = choice_A +choice_03b = partial(choice_B, string.ascii_lowercase, "{}.") +choice_03c = partial(choice_C, string.ascii_lowercase, "{}.") +template_04 = partial(doc_to_text_base, string.ascii_lowercase, "{}\t") +choice_04a = choice_A +choice_04b = partial(choice_B, string.ascii_lowercase, "{}\t") +choice_04c = partial(choice_C, string.ascii_lowercase, "{}\t") +template_05 = partial(doc_to_text_base, string.ascii_uppercase, "({})") +choice_05a = choice_A +choice_05b = partial(choice_B, string.ascii_uppercase, "({})") +choice_05c = partial(choice_C, string.ascii_uppercase, "({})") +template_06 = partial(doc_to_text_base, string.ascii_uppercase, "{})") +choice_06a = choice_A +choice_06b = partial(choice_B, string.ascii_uppercase, "{})") +choice_06c = partial(choice_C, string.ascii_uppercase, "{})") +template_07 = partial(doc_to_text_base, string.ascii_uppercase, "{}.") +choice_07a = choice_A +choice_07b = partial(choice_B, string.ascii_uppercase, "{}.") +choice_07c = partial(choice_C, string.ascii_uppercase, "{}.") +template_08 = partial(doc_to_text_base, string.ascii_uppercase, "{}\t") +choice_08a = choice_A +choice_08b = partial(choice_B, string.ascii_uppercase, "{}\t") +choice_08c = partial(choice_C, string.ascii_uppercase, "{}\t") diff --git a/lm_eval/tasks/mathqa/alternative_worlds/prompt_variation/style_01.yaml b/lm_eval/tasks/mathqa/alternative_worlds/prompt_variation/style_01.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0c91f8e3ee15328f305ae5b5dc26e8ba06b88ad9 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/prompt_variation/style_01.yaml @@ -0,0 +1,16 @@ +group: mathqa_alt_pv +task: mathqa_alt_pv_01 +dataset_path: math_qa +output_type: multiple_choice +training_split: train +validation_split: validation +test_split: test +doc_to_text: "{{Problem}}" +doc_to_target: "{{['a', 'b', 'c', 'd', 'e'].index(correct)}}" +doc_to_choice: !function ../../utils.doc_to_choice +should_decontaminate: true +doc_to_decontamination_query: "{{Problem}}" +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mathqa/alternative_worlds/prompt_variation/style_02.yaml b/lm_eval/tasks/mathqa/alternative_worlds/prompt_variation/style_02.yaml new file mode 100644 index 0000000000000000000000000000000000000000..16183cc3cd4891bf57064ea59b400316e2b59125 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/prompt_variation/style_02.yaml @@ -0,0 +1,16 @@ +group: mathqa_alt_pv +task: mathqa_alt_pv_02 +dataset_path: math_qa +output_type: multiple_choice +training_split: train +validation_split: validation +test_split: test +doc_to_text: "Q: {{Problem}}\nA:" +doc_to_target: "{{['a', 'b', 'c', 'd', 'e'].index(correct)}}" +doc_to_choice: !function ../../utils.doc_to_choice +should_decontaminate: true +doc_to_decontamination_query: "Q: {{Problem}}\nA:" +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mathqa/alternative_worlds/prompt_variation/style_03.yaml b/lm_eval/tasks/mathqa/alternative_worlds/prompt_variation/style_03.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c757e6d2f5c150b9b2979b28d55c805010170b24 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/prompt_variation/style_03.yaml @@ -0,0 +1,16 @@ +group: mathqa_alt_pv +task: mathqa_alt_pv_03 +dataset_path: math_qa +output_type: multiple_choice +training_split: train +validation_split: validation +test_split: test +doc_to_text: "Question: {{Problem}}\nAnswer:" +doc_to_target: "{{['a', 'b', 'c', 'd', 'e'].index(correct)}}" +doc_to_choice: !function ../../utils.doc_to_choice +should_decontaminate: true +doc_to_decontamination_query: "Question: {{Problem}}\nAnswer:" +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/mmlu_output_variation.yaml b/lm_eval/tasks/mmlu/alternative_worlds/mmlu_output_variation.yaml new file mode 100644 index 0000000000000000000000000000000000000000..28c5caa940591a34c2855e02049a274c2b62ef46 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/mmlu_output_variation.yaml @@ -0,0 +1,10 @@ +group: mmlu_alt_ov +task: + - mmlu_alt_ov_01 + - mmlu_alt_ov_02 + - mmlu_alt_ov_03 + - mmlu_alt_ov_04 + - mmlu_alt_ov_05 + - mmlu_alt_ov_06 + - mmlu_alt_ov_07 + - mmlu_alt_ov_08 diff --git a/lm_eval/tasks/mmlu/alternative_worlds/mmlu_prompt_variation.yaml b/lm_eval/tasks/mmlu/alternative_worlds/mmlu_prompt_variation.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d210cd1451f32aa232e2e06d8fb51d7add43c189 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/mmlu_prompt_variation.yaml @@ -0,0 +1,5 @@ +group: mmlu_alt_pv +task: + - mmlu_alt_pv_01 + - mmlu_alt_pv_02 + - mmlu_alt_pv_03 diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/_mmlu_ov_01.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/_mmlu_ov_01.yaml new file mode 100644 index 0000000000000000000000000000000000000000..80dc28669e9c741cad8b6e6b3e97f51d58f0101c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/_mmlu_ov_01.yaml @@ -0,0 +1,5 @@ +group: mmlu_alt_ov_01 +task: + - mmlu_alt_ov_01a + - mmlu_alt_ov_01b + - mmlu_alt_ov_01c diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2839307e913a1874ceead31f98cad00fced5d204 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_01a +task: + - mmlu_alt_ov_01a_stem + - mmlu_alt_ov_01a_other + - mmlu_alt_ov_01a_social_sciences + - mmlu_alt_ov_01a_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..ffd6d61753bfa5d464a9a8098ed9121e3e397eb5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_01 +doc_to_choice: !function ../../../styles.choice_01a +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8b6d983dfab869cb45f9e32dfb9dddc461e2a665 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8179c42e5c7110cd6a922f7794d16e7be8cbd02c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d81a6206145a8d79c5d66120ce6b37a38c8b6950 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b435f2d16ae795c5f48c00b7364ae5075cce9c40 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_01a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..397f1a22cd77f0990378008b095cbbc4e12d903f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_01a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b56ece83f5afc07904014b86da372647298b0e46 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..200619e1a4857c8d814e4970b4ce5d75876332e5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b3c603c5ab1e8984ee7cde5b84fe9b894be20542 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0ee8bff060f80c5f02918e72e6f2243f90d79eae --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4edd910efdc79021e75d5ec0016946178bd7dcc9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_01a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6f57ec94da4aaa946de6de931769462efc384120 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1af0a156359b9b2162a9ed5c1a4f2cc09dff1888 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6b1351957e2b13fe30b8de23fb9c8596f2fc0f54 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9ebd1493af8a43eeed96b771422ef96c5eb993ff --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_01a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1aca0f3e43b9c2fa2d5d725dcb2302ff9cce609c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..027d0eb46d0e8e3977253a06f09bdccb2a8e54da --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ec8595a61a12a0be43988fdc598d835d9ad05b03 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_01a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dd4cbb421afe3f7da56b4b16274189b740b4808d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_01a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..23a6c36093a47081e10ce2b3bb6c65a2c8942abc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..64224c37c0161a6a136c22ba823a2483836ab90b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5a5766eab667b0f0d5c469528cba68104ced6094 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..48d73dbd0fc71bcaf5d6fc23ff59bade8756406c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_01a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bcce14e7404bd49108acf730d3952fab2869d538 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_01a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..be05b188bf5f5bc49f49eadb344e4784630e7698 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_01a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c1e46832297552fd5b3ebc3740021e446dbd50c1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_01a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..195266173ab029ff830b269738b4d6c36e21206a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6d6e123600445aeebf37053cc2738db7cc68d5f9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_01a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..258b3a9909caa49a6783c345f2ecfc1913a7f67f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..975a9128b3b86c7f524f5cb140eeb1ee6f18fadb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_01a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..47e394d07ba617e0389c098502dd26bc39fd3dee --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..034e88e4a457a5e230bbaff322bdd377bc51c0aa --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_01a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..67aa7e65177ddf33871d845542598f3a02509778 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_01a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d165939c6e6817562c0f136dad275f4a6cc59fb5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_01a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7f6db03774c11a79b819ca1f75c14dc045130a5c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_01a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..96c4c5bb134e4bd982910971d854e8f171f07be3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_01a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6a64fd5f88aacf1e90a3e74b99e5ba97f1e7ae75 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_01a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c56f545c4858d6bda12cec2446ef869930a7f6bc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_01a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5624ee9ca044eb1dbd1b827933d715c549d90bf5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..23cd77ca6046d5c209f01602aee2367b4763db2b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_01a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f16d837c5cbd3c33b2c1d6e667d5c63a70f041c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_01a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..36b8beebaa184819b06a9a60ddfbe3af51bb4799 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_01a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..14d357043bb4e40fc570898101318ecc0326dfc4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_01a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2ffc917790a99309ffd919237d26beef1c270cda --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_01a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..08de040cd60cd272622cbbdce8cc37b4000e6683 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_01a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e3729066d2b494d8941c782a34b4446b84a485f4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_01a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..446ae69e17bfbce1ebfe124c59a0a36006e2cc46 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_01a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e918b8bc80237c9cbf5d1a58e58b2807342c749d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_01a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d88699a40937216898b044b10e80d81a8a9ac41c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_01a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4720ea2dff183cb3ae06bd2388184474b2d2760e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_01a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..94c23c292bc7f0bcf4e347b4b422240f05624665 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_01a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0eba14459b46262a26140088dac83b809fb5b764 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_01a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..638362e420883b7a7a908399b6b15992e0301d2f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_01a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d14b4ef4ffe4f613e2a3c862f733878bf00495f5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_01a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..41969f6d6cbfc0d27225ec41c809e81448d76a99 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_01a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..02380c19b0034026689b86418e18d244530da288 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_01a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f077a31ac970e0dc0e84afbfe05855f281456b9d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_01a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c043a27892d841ccab222f189b93967a4c8bf867 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_01a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..98de9bfecd6023add9eae8df0bb5c415046f6453 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_01b +task: + - mmlu_alt_ov_01b_stem + - mmlu_alt_ov_01b_other + - mmlu_alt_ov_01b_social_sciences + - mmlu_alt_ov_01b_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..3fc9f6401624f0bfabe588d08315a9e20fb97b2f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_01 +doc_to_choice: !function ../../../styles.choice_01b +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c729cd5d99c605ccefe00a3353f1f2b1ddaa5eb9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f0fdcc02aa9ba523c0ec71d3927b8d9171df07b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..52479b698a4ca793bb477748dffc3d536b2f6311 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a6b2529c60014cb6112da217703432e3ec1720ae --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_01b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..48d18e21dc673b127b79bee49409a041651bc15d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_01b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..67be6bdea2125f9063f85e93b15e5188acf7ab40 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b5a2b281bf1ac7cfcb685d2e1ca99f7acd80db0f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8e27b86f05d6d4878795d27b72ae3f0d999dece0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..adc7363ea3b5ac8a2615f20eab478749fda32d29 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..10c82eca43ca746afa63476750e39a33502d33e8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_01b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..af4b7ac17d86405e1118ca0ac92a47b9e86ef05c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e261a81febb9d7f7d397f6510cccf861a273142a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b18aff6b7eb4b11bd76fa4183660e022fd663dfa --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8b27fcbc6c2c25ed84e81d27bd87e21da9d89985 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_01b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5b6b77dbaf4d2502db7eed83eed6b9759e336046 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b388c14299902bccbedb2e2081481b4a30763e24 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..380b2c72565e3f87024893a62e7f4b2766d7c3d4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_01b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..666586e6ec510b2cd7575473e766d0c4130ebdef --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_01b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7c3c0284bb19cd15ba88e374f2a72875e18daffa --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7c45655492fb9750a5ea3c4e72f10d981db2b392 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ce932c8874f0fbe1a8e6caa358529b30aa51fe05 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e74ac6fb5467f03a64484e594a53e7b346eb2bba --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_01b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dad2dad4a23b019381ca2be46b8e14d8900086a5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_01b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2e646650913c2bc92e446fef3adba906780687e6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_01b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3b64889871fdf13c4072b3a96da94d3ad6d9876d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_01b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2d396ff6876fe97aa0e8981ed3a363e17e6dc29f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ed595897ca74f0baa6d69f729f0542315bc966be --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_01b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9e15d293e18c1d663a6c9c1f5df24262ee02d6f5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c2ad6c7fc760cfc0f3780e5ba522179468d5ee0d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_01b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f9e7e8e5b5edd6c2ab31c1ee8d2f8121c9257572 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eb383e715741b22a82e56a1e74e3e8897655c4c9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_01b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c5942ac06ac16faab01f97819a23ce6744878b51 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_01b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..34b171b19d2a7e11e88feed4b69e46b44708017c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_01b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..890dc96acccb2c6de66dd9d40a3d73e6c7e8cda7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_01b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..915a2e9f0fb2403a7cbc04cbe9f61c9127edd9c6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_01b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..61824c8f475e0a3d23286745f558c19c54d23b00 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_01b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..393e1e74c36b04aa92ee82fc0ed44e3a24c65be1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_01b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..12bd3d3680507f6b5c388be54591770dd628118d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..36c9e6cce0350796a3c3a6175cd75982e609f5a3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_01b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8578bb86296982129d58515b9f3d40a6f601ba0a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_01b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..55304f5b1241bcbf700ca9ac8358f990de2b0ef9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_01b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6fb4b8dd59749b869080e13d018be27e2609566d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_01b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5bf805a784025ea2890f3fb261c440b2940359b2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_01b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..322a98d53b3935362a3884aac1a2c7028a85ab11 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_01b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0f3553b32acce4d4241061cfbbfa538273fa5b6f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_01b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..badf51ea355879e0459ce48880a4379aed078435 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_01b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eec54ea0f4c9679069743110e2a84cf10a74014d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_01b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bed584f3512d4574a8e73abc14d3de0dff1c6ac6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_01b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..574f13ff97a4d73ee33c06770b85f517eabff108 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_01b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3913e4b13cc443780e5b9596c88e09b6bbd69ce5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_01b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eb612a970e1d10fbc4febca70993ff6727e27764 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_01b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..36e179e8fbd0ced00f71a2dbcddc4c10d0cdab06 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_01b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c907ec26af6b44b4b0d2bdaa7ad366b8b8a0f8ac --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_01b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0045baee155125552e5ee26103757c5342c7911f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_01b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5fa5b959ef4b24aabf97429019a3b9c07dbe13c1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_01b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f045210ba528e9fc5309e74e3c41ecabebbb4400 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_01b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d252c7307987fdb9f63bac8d22204c4eb84a7ca3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_01b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e3a06f3136b4f6102cf7a38749e46bcece50fe92 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_01c +task: + - mmlu_alt_ov_01c_stem + - mmlu_alt_ov_01c_other + - mmlu_alt_ov_01c_social_sciences + - mmlu_alt_ov_01c_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..aa68959a9283de7f0ac8a1bcff2249f0684ac109 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_01 +doc_to_choice: !function ../../../styles.choice_01c +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f39778db20f8f1892a472c89bf333ae0479b29ac --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b3df09f512feedd6bd6bf425c870ad2fe968bb85 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2ab730aa35ad8b5c6c8d3db6a9a9c0bacd1245b1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aaf6bce42591b3678c5855fb1665faeacacda0dc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_01c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..26c029b9003878060e885b4df0e031360f73eb84 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_01c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e7ed41b30ce58346dcf8872c88e78c6542380073 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d3c11799f6fdf3b2ecc47335885c42e2f5ef65e8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..65b4db3491b2385b432cdcbc2cfe30024bf5c2bd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eaa9f4272eb6b2a78b2ebc1b70ddfff2f8759887 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6e4c68770ec37bf9beb273d044f64ae3d2db75f8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_01c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0f30cc94e4e6244f01dc9749b50021215aa9e41c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4ba1c770a8386158d4dfcbcea4e4810dfee434a8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1a0fac6de291b6fae78cddadaac0363d9cae37ea --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..16953f8702c89d6956c1293bf0ce96354f726a0a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_01c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a24deb272c6847d9ba8ecedef9ad9cb92c8de820 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..78f00f577e1e3be5fac8f8e005cc7c5a22793153 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b96a07804c30caa38607584407c45c9597b62284 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_01c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..06c6be194672c6b2403c8fa7106debcf5cf4097f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_01c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5a54c7e569a0a4d78f7660a003949f575b8880c4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8e30a672f21286b09632dfe9dde04712deef5c51 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2af74be59b0db94f846c301083916ef77f267ad8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..268121e5239153d2cb87a04ed9e977bf1c8341ef --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_01c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..120e737f2cb7ebeb35a52c6e4e1dd449b12c91cd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_01c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..db0fa59d34dc3ae682387c8e755a7c0b1a8d62d4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_01c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..979ff8b75aee2ba5129d74522c2c2737d17476e5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_01c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..186799f036a6774984f2bdc77f63e0c1b5991a78 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1b6fcc67c71145d6ea8a20d3ccd71679ac146479 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_01c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d733408b9b95c9f52aa289f20a7754704477baf3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0005c6cdee7f883116ee0ce65b15536ca0a2e9a8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_01c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..262a2ae41532286817e32bc932ee4c8f0c1e4290 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..53d3bb67e2220c71507d3e7e528930d2c1a52e7b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_01c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ccf074ae4a4e3379fa2b822fac18055a1b79bbbe --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_01c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5365955d537122ba7f12e8c3dea6af94c5d67549 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_01c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fb2c8df3995db90c487fc6e1c0c8364b6531b852 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_01c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a8d94795d3d9b2f749197d5865b3f4b2976cfd67 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_01c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d2eebff25ff5a92525a840849beadf709def8f49 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_01c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..20e3d93f867b3a0827d875dc1a97993280055037 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_01c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c2af3d02a584b37d39e7931ed85ff2b8f6fcbb67 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..44d477cb34f0817751b533ad3893aeadf561b9cd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_01c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..424f941ee6be20e13dce783bdac4ee5b9a652f34 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_01c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..540aba6b9c325d6517794f79a24703096926db7f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_01c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d76c911601c11c8edd80de99727b74960e395f69 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_01c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d36748206e5563c9b8ab6ac55090e77535a17a93 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_01c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eeb089c88e4a27f1ddd1cc7488f5a389b1f341ca --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_01c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ed8cd5bb3c915cda6cfd28e8dc104d1565d6c951 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_01c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c40e4600f76b17811166d03154313b930ad2b5f1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_01c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9e48d0a50ef33a7a6e0d3ec4d53d785af440fe9e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_01c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..89660aae20e07672bba9a73c8036772f2231f66f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_01c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4fa857943dd7414a498a54ba599b540a64abab1c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_01c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a59797c18df515182f99bc7f6861c462c32397f6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_01c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b7270adb4eccfab6a00cf58e13a1c74d88c89bee --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_01c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e830d81f3901c9a7dffc4148dedc497542d2794 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_01c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..13d8a3ec08d3524679db616ee94c8aa71d0dba38 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_01c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..202597534863bb83386dddd27c25e736e0fd4011 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_01c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ef314a4f08e167968dd070d332b87482e7d6fb06 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_01c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f4d3c87faabda9de3a8fab69736c30019333e0a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_01c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a5e54260db29879e428dab4b104195fe42404092 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_01c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/_mmlu_ov_02.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/_mmlu_ov_02.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b7bb3acc0944a562d66c0dc79dde24e8e354fb81 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/_mmlu_ov_02.yaml @@ -0,0 +1,5 @@ +group: mmlu_alt_ov_02 +task: + - mmlu_alt_ov_02a + - mmlu_alt_ov_02b + - mmlu_alt_ov_02c diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fb86cab1d721386277df31381adbfe393eeccfeb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_02a +task: + - mmlu_alt_ov_02a_stem + - mmlu_alt_ov_02a_other + - mmlu_alt_ov_02a_social_sciences + - mmlu_alt_ov_02a_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..f9d0cbe2a3dff9a6ed27c4ffca1a2dbc6d1b1172 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_02 +doc_to_choice: !function ../../../styles.choice_02a +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..48affab83e0ff32ded2b5e8da6c30fc187547206 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..22c431f2b43a7f28e5128c92ba69d962b0c75e57 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9d2c6ef8ad88f72163df196a889124917ecc7529 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..909cacad66ef33af182c5aaf77c327cfbf2e9247 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_02a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f954bef2875ac6ec0ba9529acc973a66d1da5ed4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_02a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a7f3b503b9f82b507fab1727a8f323468759cac9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..43f59c4086cd0c59ef33ad0956b79dad40acd693 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6e9e50b4cd0a3fe1d77d88048b0f7d7b1803cdcc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4fa13ec23465517a390813885033a289131bb660 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8dd5e59d41c869201988d8a60abb8101eff9c762 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_02a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b5d480261410b43ec6710989cf9b03183b436151 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c51793c23baab9790fce163ff42fd35772e8431e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0ae991c77a4343db6595ef7609300fae0874c468 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9c2453b3adb9e406fea35a68ef3704bd2ae4f3be --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_02a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f84ea46bafd058c80836deb6da2db2004cadc067 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..70650738c9ac1d8a8eb77bdb824ebe26db9acdf9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..58bed110417e96249ef6e238de727020ee4a75c0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_02a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..67c2bccb3b9932e8b288c0903a834841e496ffb4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_02a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..606cf7cb03bf1103331baea5cfaf2c85aa6c2c48 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a263a8db9100b65debe940e2a9d5cdab1decf48e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aaf11f19d5c3e964b19f383f2756fdea03273ecd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..16d3c792b37e2dd344a84b9acfa62f01cc695138 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_02a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2890a9cab51ce72ee6bb778a9dbbf6f1a820510f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_02a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7f9ea1ffb1e1cd536b4a8db9ac5b938a3bee4aff --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_02a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3f9d9bc0494e491c1ce2fd39897877a0a22867ab --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_02a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..95849a8a376078fc1225f739a587cd895e4bfc98 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4a47d2b37b5e37966f0a4632dbe9e9bdc5f4ae41 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_02a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d4bc7e853770728f6bfaef028c1922774aa37b00 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a68cad24b182042cbdc60d40d4440928f3a11383 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_02a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cc69fe7e1499b751ef0e6dc91fe6c6783b166340 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f2d93fd22c889c8ebf06032c409a87d149987b19 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_02a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..09cca52d0917b70fe4f594137acf759bd45d4744 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_02a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6e13b1cc2d1fcae3df4f582d2cbcb8a170b0865a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_02a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e41cde9fc9754e0bd0a5f7588cd116eabaa3c752 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_02a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e7329d83ca234ec198e92a591fc4a8037279fb16 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_02a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..83e9ba831f6e388f0d0075a330e5e4f9eb03b5c2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_02a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..19f7be70887026a001eb66f7d474745fddbdeca6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_02a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fd9c96ee8f3219dac065b5d2e50e4725d5774ae7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f57c2349744e9d5bd17b01f8835d723785d775b4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_02a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..96eae4ad6316af1cdb6278e70329227967e60c22 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_02a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..be684a6779f0cc505d5dc85f6669e97a35c77b3c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_02a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1987aa8f9fb6204b3687d6e4d7dccf6eff3c7e19 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_02a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e0a677bd224ee696ec0e8a45e4b5f610b9e24866 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_02a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5c0bfcf855da9aa9ba868a4751a9256e8581d8ee --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_02a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0a9a9b8f4ef199b26d188aac9364605e5e42fbd5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_02a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3f8a63e418f8dac86c46ad4e5181d2c570dbffa0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_02a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5d0e79d2dbeacf4102206d0fb86e27e72433b92d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_02a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c32eb1f056e140462dbf85f04ca35d48dba1bb62 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_02a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..670486a0ed56f5fc85dee041cf3d4769e323cf18 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_02a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d2d8c0e178a01b32de70ad44b619ef75ba1dc439 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_02a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4539eb838fc4edb9357918ab99ec4d8b15dcafeb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_02a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9a91416abc8b8122c21496af6314fe59a94d2389 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_02a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4563db6759fcb1aa1c0a475d12a3e8729187f4c8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_02a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..15a1c3cc20d8a6561e4d45fa19c42ff28e39de7d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_02a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..78b0a872e52af17ae3d68d7f495b58e40b3a9d95 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_02a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f2a94a9241ca73f1b9f8f39aa8bf2733e1093f7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_02a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..43680f9d0ee80abec3f7886d87125b57b2f5b56d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_02a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e44e24d8599a7a66a366d325bf65ee657b0a958d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_02b +task: + - mmlu_alt_ov_02b_stem + - mmlu_alt_ov_02b_other + - mmlu_alt_ov_02b_social_sciences + - mmlu_alt_ov_02b_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..61d42dc10941b1442eeae5ce04d02eda63f7fe3e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_02 +doc_to_choice: !function ../../../styles.choice_02b +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..beb0b444beaafd17214eaa4417bac6e813f00ba6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c191de4d02b6ab20be880ed7d1af6fca15873732 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dead2a7223b3b57940f65e135378e10e89a1c118 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7606d9de662f9328f58a04e4893dd70ab5920b05 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_02b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..590cc13119c039fa9d48f64c11db95e87e9fbb76 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_02b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..70fb2a00ea8d54a7f5e2ea4a6c02847d116a09a3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d2507d3cc9b22362153a171df9b54ec99b7b218c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aa2d313557cfdb7d750ad83ffd592d3bf6747d19 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b3b3df6de915c0cbf32f679099ed2192e23a5c00 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c0c13556bd8396c8f8c35a5632f4e9b7783650a8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_02b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e6091f9faab39fbce0ab1dd74ece7589ebf888ef --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cce995379b9bb1a7aee6305a79535f264dbe2344 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dd07cd4dc8ff148b8171bc56b56518d02c476d67 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..55fdada85f55c179d183ca5496c1a8f7f9a9550e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_02b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..23b5eca775b189f4423b22049484771cb27add68 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2968ce239ae772f6e08c24442998fd064bc9bd37 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eb5607a858ac059cfe5807914bb84e58a7909425 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_02b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..23d91ba623ec73c732b22a38ff2cbd6d2c9a2209 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_02b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9a466aeeb34f2089fa09e2c4439d88c6370f2ce0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ca1deecd2a130d6921d58cf41228c46c9641ac0a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8ac732305242bafb29a1bdcbef3c5f60b99530be --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..16e61fccc7fa8c120d70e1807229e69595bed02b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_02b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..edc8d86addd7e21d45a48dd355a9b9b464fefbe5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_02b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5ee9b96fde01e7a60e80fc243ac0e5b252dc52e8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_02b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b60e7ffde989b71831e60f0b4815faa0cd6f4dc3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_02b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..232fd0ddd7693248bfafe0f7f579d69efa5aed19 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2aff08168b1c00149669976995c93b5b641b9cfe --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_02b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..279ff330a6665a5293476eab4f7699b2d5a4126b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1045aae01eeb48d70702c51936f9e5382dff3311 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_02b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d1aa0bd812d13d864cf2ff0e0c8cc1dc7d81c9a9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bd244d7069c47174b09b929a4e8d558ec97cc2f6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_02b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aef60150e4043f7cd101d8e5b87d82fe0a4e92d4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_02b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..df776001c6fe49260c38c1a33e876f0b8f1fce4d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_02b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aff4c80ac411b6b4463080585137fae01814a5f6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_02b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..86af3288caa58a7569b74f63df0729d95b7be1ba --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_02b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..934f6dc20d2866f17fabaf396d85d4ba090762fe --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_02b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..650cdb8eca4c0eee11a4c7decc1caeacc4e3bc91 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_02b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ab6e4b89b87808ccca457643a72fbfe15d1b7983 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5172803afa5e3d72918354a1c80406d614b36c1e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_02b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a6a29fc1afd73ee58b6554fcda56a3b40a508d03 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_02b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f71960999c7e5638535089ef845b1c6d040a453 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_02b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..74111ccf635cc42d7ae501c602cb74d8d545b89b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_02b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6ab4c6634a29149eb8323db5c7a2745483b2915d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_02b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e3b1e389e222aeb9dbb9c38bdbeef39dea9d0ab5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_02b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a315c4a9998d798cd7a298c85c41ddfb6fee83d1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_02b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..613c9029c5cbcfaf4d12ed0577a69f18b7f0b7a3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_02b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4c6498ec4350341423ca6818ab6f05d3815a1a4e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_02b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d85788b3f9ec0af7c9442f35fa34d11b310b287 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_02b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1e73bdb49128a392fc516beceb3a267577fd2f2b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_02b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f3a837611080afd0052319e4d3c6a6d82abc4419 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_02b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a64b92e3e5cfaad3d645427a31073808374b3353 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_02b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..11c8dafe372f33f2001291d51c5514a653760cff --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_02b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cbe15e92c94d588b5ef15337f20c79c01a2af562 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_02b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d368d6955533fc18f2605ef74e576de2b92e6d6d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_02b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8967a97ed2ab637adf720034b5ee4147b894c7a4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_02b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8e257c1ed05bdb426176ec142c7bf2ccbeb115d0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_02b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6c2529c12e94daaad84467359c5a3282158894f7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_02b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b203b6aff94d87359ebc509b91ea9bb7da6951c1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_02c +task: + - mmlu_alt_ov_02c_stem + - mmlu_alt_ov_02c_other + - mmlu_alt_ov_02c_social_sciences + - mmlu_alt_ov_02c_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..15784a859004fff8fd7400e50d626d6ebc90a6de --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_02 +doc_to_choice: !function ../../../styles.choice_02c +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2995969b2a7260569fe4ec650cd348b1c13f5f20 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..79df68824bb034b31b74e824361680b1fa1ecebf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f637b8f02b9f170ff7aeb3103a9b4f554ab795b6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f5719fe573fc10d2622cd95e94cbfbb65643d262 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_02c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..69a546df2974a994b52ea8372f9ebb0bb6153c31 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_02c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a9d4f001d951d5ea19926254027151ed3cea50e0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..66b92c4839095ab49cb9455a8eb9d909e2df963b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fab1db74df986023b402afd0ec954f353498f500 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0448b27ee6a99bc77a97e0705cf6c8fc9d374601 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a760b2230067e1e82d981c24c1dc3d0ddb2b4753 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_02c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..59008d608c3fdaf20608961f8894a40cee5c138a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c0f725bbc5a037fb406c4dc03359e1504045f75b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3a05daea8bdc6e78c8a810578cb75e5c97581cb4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2d60b39449758bb03ce209e8ec9ae9f3a90110f5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_02c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4616d29fe79d9583d4c8d629869bf8a789e7bf12 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f1441a0eaf491bdff0d821230316e8ba5eefa6d1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0593170aa6a479e441d42a2c8f5d7e888074df81 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_02c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e59ff221143895c023385b0e92547730e5fec8be --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_02c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..733994e81df5263cee66a0e65196fdeeabd0df94 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..583722f4d78a62bda052ed17ca71c6c127e047a1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9480cef183b7df04719cc76d51b7545274483104 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0e34274a9db903fcfe453349c174382c6e9700a3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_02c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..db513bef6c648c9e54238047896aa78928a9515e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_02c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ba41acc3db10be1a75bc0a96c7b7b6d8fd059c15 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_02c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..155d67d1b18c91022f521313a94467bb11978271 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_02c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1dfd8e7097f16f8cbca7f8109445a2449533d401 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d5da992aea5b87552ace929721a87a8838d5f41b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_02c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7f7f6e4707ebb5f708d038dfdfefa928d948778d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e6d7fc2f383ad667a9a0408de0c33aa1b5f1eeaf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_02c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5855301446099347d908017400785c0079901cca --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..74d97255a4ce1782f76fbf4d7b4d7f7c0d882c89 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_02c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c2a8567329f0984dac25494cc47a7c35d3636641 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_02c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e44b95c4923cd20cbae28522c9eff74ebfc110b7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_02c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dca8f5d630b48b07eb8f7150ae588bca97751977 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_02c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9b6e4a88a66f1a7d4c4a512265ae88bf5b9e39c4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_02c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6f785e0eadc316e0bca039edd68d479dd05859b5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_02c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d610efbcd7690803efd5ec1df1fe504da924ec15 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_02c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..791ec4f39336897fe78ecced21693f97e788fb7a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5a49c5299274f11eff614173522eb108e529564d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_02c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c735a6bae6a9d9ada2e54415757c7e4b16305bb7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_02c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e8b331eb47bbf62a48068cd2279aaff96cd7c311 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_02c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9de856e1e0caf29be919fb6841d644fea54a8613 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_02c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d8770f2ac67cb828314e796251fb7e7c76944d47 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_02c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b52874799594d70900e4936dab8e83ddbe898d7a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_02c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..db79e050f71d510e14191b789e635b471c0c385b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_02c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..939e94b5b937a617e825f8f03b0fb1c7ace27ae4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_02c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..04432dae57e4b76b887cac9b20b146973180ed48 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_02c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5ef6434bf56530d11528d90b1c0acf8461d0a0b5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_02c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..64f9c536967a393dc91775559d748de0a3a70b64 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_02c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..741176f51b33cb57adde4c99eb85e47ca0b08e1d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_02c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bc25a5ded9e716a14f55208b627b14a60fe66a7b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_02c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f4148aea3353f3674ddbbffb4b56acff17de676b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_02c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ff3d0a17db8ab396df863fb96b518ec0c1fd470c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_02c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3ee886b7ed19565736fd3f7b2b7c438038280683 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_02c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..35747cfcea727fb751c876eb5c8aa452ac42b3ef --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_02c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cbb163b3985799834fdaaa3d34f05a75a5cee090 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_02c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0f6ebcde23312a8bb494cb1d873d22a4e80d0cbb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_02c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/_mmlu_ov_03.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/_mmlu_ov_03.yaml new file mode 100644 index 0000000000000000000000000000000000000000..92c61b2f106120e561982fc91b36c5f6e1e1c870 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/_mmlu_ov_03.yaml @@ -0,0 +1,5 @@ +group: mmlu_alt_ov_03 +task: + - mmlu_alt_ov_03a + - mmlu_alt_ov_03b + - mmlu_alt_ov_03c diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aec0d59977e0c3cfff1fcbafa073407ecd60a2dd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_03a +task: + - mmlu_alt_ov_03a_stem + - mmlu_alt_ov_03a_other + - mmlu_alt_ov_03a_social_sciences + - mmlu_alt_ov_03a_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..dff6a860d60e6e415162d320db870bb0fab42ab2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_03 +doc_to_choice: !function ../../../styles.choice_03a +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2dc68f416836b3882b84c8fb155e188639a6690e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3135aeba310ea0f1b7a0491e97e58b85bfcc8c4e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..35db0fd5baed2cb6bc9bcbb7bd3a9d132d8339ec --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ca43852c29f5b029d217e5795a55959a0dda0f7a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_03a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ee884e242105b9239a1b761d2b50133db68404f3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_03a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7ae31d429b06963be02fc5a267881f26384e987f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fd30fd0834aa135daae443eea7368205e40c3b03 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bd74f254ebc940b8ef1c95609dd0b5334ac53116 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e45be70d82fae1e650a1bc02e0215877a0d0bb9f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..391a684963aee916a19f680ded904981c8917e53 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_03a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..308cbfc77399d665a469bd06c8264a5154bda7fb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bd155bea6bb10764a501d6781f4f2e6728199bc4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0ff45f92674d550a5738a714ba87e6b63936e235 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6a1b2b2620352d78efe66a62f0e77525621a751e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_03a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c7c53393a9a5f0179005fa9503c8e56d4b61b48d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b3e8eb0a04a6751a7cd3e653cc42ed9d678e5bbc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c7819c3889f83087c72c475176696d432df74f1f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_03a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8aadcf3d3fe4a679e4acf1ade08cfcc19de85c49 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_03a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d4a6284f2fbf28d49e53c77de7fb5b509e2cea38 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8709ea4ff42d1e638a575774929ea4506a5fb2dc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5323a1b47dc4109ff1f1b95fc48d5785a6562b60 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8dd43ab58acd03febe32182d47fa0d2b701e2b97 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_03a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..42929928f2c971fd397ab9411877e0b6da400fcd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_03a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..67d153e05fcf2af48197c348c4b0c48e9f48cb15 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_03a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b0dc557f6ff21001f8502f7844559613df59ac0f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_03a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..001a11f4795f8ae0e0ade5f0bb97b54f7237cd7e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eab63c5dbf51adffb50c84f5980089564fe3eeb8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_03a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d5422c4b1112eaf869c36b8239b3a5e44425c601 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..80df8ad21ce98e6869e294a36c9125451c281a02 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_03a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4ef4b26009c64dfe83f653c19a8f3b9c2eec8472 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cc86991a870a168e3855583bf6bac851744739af --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_03a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2ffa6b0dc2e674b49ef8b09b8a31caa7eb9c0154 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_03a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4f0c5e199117fd250acda9ebde225e53205a0108 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_03a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a9cc8675b982ee1b34a1d1bfcb234fcae0b8780d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_03a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d2baeb1780dc5006e5174e443ef4d62c375d4ff0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_03a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..17bd91956adae264ed37dc4d122a2a9227578635 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_03a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8c9b4c3bf776204d498f56ad9e29b533683df5db --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_03a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c9e52901821637fed0e0bd0c0da908c38209deee --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a1d161c8049f878ae3003a8af88defb139b38edc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_03a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6477216d50d4663ec57b2bea30399e46a054d937 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_03a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cc1a91c1ccd09c14fe2c09b9efb17ae3ae8d4228 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_03a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ed6b320de6b822d3650be86cdfb8adb82579f906 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_03a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2aeed4fa8894652ef419ac6f185bbf28238d9a1e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_03a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..878b7dfe863901ba5a1d821702830ab8e204400b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_03a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e0c4a2cb6173351dbd1ea86d4f5cd5dcc4460a55 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_03a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4a8d7020aaf5a67018edafe4d80c0a3b68822d66 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_03a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b32bc19dfa22a1d4267306a0afcae8128c22009c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_03a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0ae9751307f9df24200c0d665081e50d0a0a3df3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_03a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..744ee0674c0b0c2c8900adbffba90a63dbb436a7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_03a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cc67308aee260b39f7fb52532d552a7a38fcd97f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_03a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e0e5dad03b4654e8b104a39cfc0509e7ed0881e8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_03a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e26d3724de92b3a0f98f439439fb12850e3b76b8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_03a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6f2d39ad2e31a0ac7620062014339fde98a84e9f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_03a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..808bc194fe19f6c4c2a60ba0c27434128e59f0b8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_03a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5cd2a44d2e6abc11709e27be508ec18db32c5504 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_03a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..542cbecdf82780a38988fb9e4b607313f0102a68 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_03a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9383a90dacc7d58e3aa12abf7c96d81c4509b3a6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_03a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..97b9deff980bb7d3899e748138437c0a3d4d3e96 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_03b +task: + - mmlu_alt_ov_03b_stem + - mmlu_alt_ov_03b_other + - mmlu_alt_ov_03b_social_sciences + - mmlu_alt_ov_03b_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..9c2f674d69a5c849cef0d82be7a9ef61f089aacc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_03 +doc_to_choice: !function ../../../styles.choice_03b +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8125f799cbc261567d4f26ef9e6b7750d1334d0e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..49e6b44a327a4c1d2f6eef77d284dc7511bf5b07 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c08ddb38351c2faab88bfdbe2d5898fbb47197cf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8e0c1a93abdd8aba8786f98f0effdf10248855cb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_03b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8a58597ad6c0f6305f70e118b485ced1062e1721 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_03b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7fd861656e941ea0e83de9080c9b22c541ef0873 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3bae9bb7a2d3b8bd3ee5a73f4570375c18d67583 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cb6c5b6e2f36162bd9fd9c5f4bf4f2aaf674fb32 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8a8ce24d7db5814854f656af53c3d81747c0286d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2586b9b4f91d7cd10a72468c70013c7714fb62f1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_03b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8cd632d0fa33562ea9f1ba83751ca7f406128f79 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..85cf62034bd1266e4902d784011a6145a1f12674 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6d5537456bad4ca97126475632c690b190eee6bd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ba8a62e5b8c9bc4e98994ffea6b495c8f71d3217 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_03b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..00974ad8ca8be7e6b75ae6e22827fcf8d8ee57ca --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..42bc502a2db7ca83290aa6dc976466f13c5d6925 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..03478823de5853b09a1d737534cb2bfc6784f623 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_03b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..77632fe897f13ba7f86304545eb1ea7c08ead3f7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_03b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..72c770c20261d2142411f982f0b3e535938c7d5a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1a9dfcdf6ea70d00087fa58ff087caafe4b751ec --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e9fe7b8d83119688bebc1f49c2edf34778613f3c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6273d5f46c9c05c086963843fcc70b93469c75f1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_03b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fbcd3c24a227d1e8b79a613ea4420bcae1db565f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_03b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a4cdbf201a2e1b24660c1805a7f1280285f9a540 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_03b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b57f64fcc7b9e3887e689424f33e69ca24f08379 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_03b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6fe0f2fc80e29d48a60393b63a70ac1c8b28cac6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..30e29284d22590a906906eef1f26994ec860f26c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_03b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e587ce8e3e8b357e8fb420634cc7623544c4d04 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e8da0422bf0d0297126e2e96f5c4f23c5b10ccbd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_03b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a6286b33d6374649de67f5fafd5e96e00c89e384 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2e4fa8368c65431b45c1dcd8f6b7726e76775d9a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_03b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..da65a5bee59729fad15bf8a65e93969ad57d456e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_03b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..79822f51d602556ad401a6a9641177228c731230 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_03b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c89a184d4e79e78fd9d68bab562de8cfdcf76a0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_03b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..91558fa1f52eab695097493fca33dc5fd4127aca --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_03b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2122f7e2658f40fb8023e97dc6018f26f7c557b8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_03b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..08315929d75b03aa22cc3c5bd72a51ba77a17e36 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_03b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8ef6877cc4a05bf95746d56959187a9b64f5cbb2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..83dc821d349fff93c0d3254aa48e3a25be4641f2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_03b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fdb0b828e87722c653b6fbd8f290a46ea00111d6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_03b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..17de8f0f9f1cee92c0493f19be7ca2c61b1b7ddc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_03b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6537662e90a838ae50ba519f3535135fac593477 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_03b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..42e23d044fbfb8a6fe37d18102a05f095d5efbe9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_03b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f946143dc40c02a17a9ba2d4e311539db68f131 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_03b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b786e7d86e10f1af74b54a492d1d0dd1620b7c75 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_03b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a8d1e4db16ac03e33efd150e4536e6ba6ee42ba5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_03b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d2710da187907315e15fb3572d41b63c8177e89a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_03b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..65c7e0214e7fdafad17a94d38b72c4f766c77b2c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_03b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9c44d7798ff2fec37760e80166dd523fe26a4f42 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_03b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3f9227542e8442a3a932361de346ff57745b57b1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_03b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..69569a9c4b019508f0e82328eea7ee1bc00dd2d8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_03b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..150151b2b186de635042376b2b3f6c1364d269e9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_03b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a73326a9c70e7c87a984a2ad8ed28629de8180c7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_03b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4f736efa8a7e3a20a7b33c170bce22390b4ef058 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_03b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..277c20602eb69f3381689d0af3316fb8689f8cc4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_03b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3ef400c3bb8a9a3d9054f42b9eb744c23a44e9b4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_03b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..86447db7897c8283dab4d84f484a2a8bc806b24a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_03b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..279057b1b5a78e4a6b7036736092981848de8e9c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_03c +task: + - mmlu_alt_ov_03c_stem + - mmlu_alt_ov_03c_other + - mmlu_alt_ov_03c_social_sciences + - mmlu_alt_ov_03c_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..4f8725dc433dd57bf773390678b7edef0b2547f5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_03 +doc_to_choice: !function ../../../styles.choice_03c +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1efca54fd2a52759a55f453b41a74c2164471fe5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de405dc9925006ee2de14ec97f7cb6632641f147 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..84beb05248f6064ff107ae3c9eaa5995e8c5ef2d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c89907147f029d6ea5a392f596d4879098a9fa7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_03c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c1f239af94450df3da84bcef34a47e8eb70487e5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_03c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fa70ae44b2c87f5c77e6bbb80ddeb2848b913f8c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f055b583fbbacd91e254b8e4136a22244f18e24c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8ab1f595f18047d8f76d5675464d3eb3b777556e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..13f4ff91c1be8565863b2fbbdefc9262ce44daea --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7bf4c54c54464503fbbb6ef1f158bd161fd086cb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_03c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7142e1ba82bec225bc22caf032289ca479e9eea1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f5702d5f6f6b9aaa7ee782996c722683a6845d0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..edc6abdaf5e06f9ab27171bf8dd5b44f4d66acf4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8764653517c1a33523d75421e1016fb1f3ac311e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_03c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ded23bd8046d17e327323c72e13a21a98f469646 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0b202d83f531cfebedd5abe38b88e9bafdb5036d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e45b33631a945de290b52708bc08b915786c60f4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_03c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a94b03d0d3e7a663682d39310b8e6c1cf54b4674 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_03c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..af162db5889152b0d0f675c8a6fddca7bdb00f81 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..32fe1ff22b204373eeadd2276c8626406bb7ddd6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..527ca7798c09757357781558eb66df00475c5859 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2def5dd780ef151d10107b66775bb4982d14ffb1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_03c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9c4a8ae12fd455c0fdb4a2d68a8387314b922492 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_03c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0e7c87d8809082e7d889bc43f38232c50d98d15a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_03c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..06f527b215a89609f2a2428c4c838f574c9e17dd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_03c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..598e5050938da7851df3d04e8830bfb68f81b2b2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..889c0704a903c4a385ded2cdbbf260a001443051 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_03c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e00f4b9cdc360af2b3264c9f1f4fddd386a1420d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c8ec2348812a59deedca38d0a01813493e024fe7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_03c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4a31f6b94aa93053d1d3027231c66fbb43fcebd6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..59dfeb485be2ec8274f6e493818cd2238eb3f592 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_03c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a0219ce0c164793633fc91733c6118d34eac0930 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_03c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3a702d7278aa078d208c7f7f1e3160ebdcdc6738 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_03c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eed5446143cb168ff90660b5b2cbf79add6db972 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_03c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a2dd5de4314533da397f3fcfddfe1960a0d60f6a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_03c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..21880b7dec3653a08ed48f039f72bf33bdf40826 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_03c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2544c2456d935a959c86a56bbd4d527b5e933366 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_03c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a5292e41faddac45d86d43fb03559d79997ad457 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d3e62abd1fbdde4400991d04e9c71f3b2716f8c0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_03c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3958944ac8090f9d2efd433dbe26837738d3f395 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_03c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d67df1c27cf60c60dda8f3172b6f1e712c1ba6ba --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_03c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f084f1fa3532e54862e0737a2151501b874a2fc1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_03c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9b48f334b602af56cedb0c3d83c47f8764ba22f8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_03c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ab91e4babc5e7c5b0b0738f9c595f7e9a8d2226e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_03c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a238240db1cf7836aa98abeaada8befbb05a1ffb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_03c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ed4fdb44024fbbc6c6e3260da3c159143de5b063 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_03c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..83368c8ed8e3026877f766aa1674a6ef1952a39a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_03c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e3187a9749f67097c4f9dcc65ee247d4f8c7868d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_03c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1604310314ae376e91100a196d42c30062758a2d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_03c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7071e39c5e71a82ce0b914c8995d5ff3abbe54ed --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_03c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0339394f9fc57b0cc7a4f7320741b282a79d0749 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_03c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0e1a4f456135f481600e09e418ba26b05dcc3529 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_03c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6141dbdf6a6ad8cef3e951672baa74f28ec3e2f6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_03c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5d748f3ea2684383733a7d3873d1d8213acb9fc5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_03c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..782cd71661e99ea16dda4413ef196e5d16c76fdc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_03c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d0f91a7f88d83d85ebab7dd9c17e0297e3a55d85 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_03c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1c3446b3e7cfcd289dddc434b60942ae3aaac5ae --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_03c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/_mmlu_ov_04.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/_mmlu_ov_04.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6dbe0e4e717159cb7c9759129006c7245645ee98 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/_mmlu_ov_04.yaml @@ -0,0 +1,5 @@ +group: mmlu_alt_ov_04 +task: + - mmlu_alt_ov_04a + - mmlu_alt_ov_04b + - mmlu_alt_ov_04c diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4c72d1b6b84b2b845c2b71e77e9c3c55569b918b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_04a +task: + - mmlu_alt_ov_04a_stem + - mmlu_alt_ov_04a_other + - mmlu_alt_ov_04a_social_sciences + - mmlu_alt_ov_04a_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..609cc7060e3a2178531fb5e16022e0631beee906 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_04 +doc_to_choice: !function ../../../styles.choice_04a +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2c89e19a7811e713dc07aaa1217c62fd511dd405 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4a50e3839f2df9d64ef1c1a7620077a69fff4701 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aec8048b4ec0f14ba5abff0a7ba07ba199fe12ac --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2041d6412341fcd3cfae71bf63ed104602928dfc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_04a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..24564acb7567f97d49a262c8d7fcbdb5c0d05dd9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_04a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1cd9273addcc71eb80f8bf5a510f3530b443eafe --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9fd97dfe68111ec8fc6b16abfca2d2d8fe8988b4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ede1b3b5b35ffbd1bd32e4c0201132341b054c08 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eed15e0d3c4372bfc588484fa4039c0f16b4f8b8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..15ef735574b777f0123c317e0add0917dd3c4b89 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_04a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9d04b49758079ed32421fbdf52af7a8d7e2ee3f0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c7699e907474fd047887297743b2c86ca23537c2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b44faf2a611fe727babbca2aa4dccb6400a4bec3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9cf241d7ca45b500423129e9bda0ad22a76f1ece --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_04a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..698f8bfee72dad7b8898bcf324543157fcdbeabf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..be3811c0fe05ba0826c6a1e56d7d8852a6449cf3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e621f77cd498bc973430a28cdae3b35d73ef5c82 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_04a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1c6816b622e64601b0ce3e0cfc4cbe532bab89c6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_04a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2951f396772c11e58c37039f6e3c427c7831266b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d2bbee0a7392ded1b3dc99af44a8d8e82d93aec7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2404903f05538c4c254a563558ab2eec079bf1cd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..81885bb7fda503b588efd8e0b5f51176d31f5be5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_04a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4362b2a5c27896f5ae4d12a80dc9afbcc24f0ef9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_04a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..caaf9bd4514eb47f7d4faace34578780443ea3c0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_04a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..51de1287e06530650a59a491d7b56bd8a08c2f27 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_04a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6355e89c719bd71ce238c0187cad661f80a983c2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1fb676d285e233b05f9a10a95c58b77a33f2ed63 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_04a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b2e3cf520cf1be73ae69e56f16c47187d4126e67 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ba37f2fb0fa427d2aacf439ca927081c56f1eb84 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_04a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..82fa0d321a8c21773efc9153a557e7341172a96f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a99cda3ad4f54a60cf5729f1067d58c0ff5085e2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_04a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cd6972cbe39b4e69d47691145770ec026dbcdbf0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_04a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4525a4866cff4d3f8a0a22b3e2f95c7b20a1cf32 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_04a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9bc378dc24b20e5a6c92f4199d07d78f28dd5633 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_04a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b71af43a82156124599c59e05c155501f8d81849 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_04a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d3e2eafdd82af67b730ff090f51dc4986fee49d6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_04a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f6620857b4f6482931f4b0b10175204d55ea9d4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_04a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7960b3fdc663fe24d5a86fc941de44bf77b7c9a6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4a3eebadd2dd147d145aaedc6bd2b16455371bd5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_04a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ff474e3783acb2aad8c0fa869130c854b6d02307 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_04a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d56452d6f18d774022a39825af9b9d3e41338069 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_04a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..756bf186205ebedb5e6e5e52576a3065e86e0ef5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_04a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4a1d9f64d729dcd442c565835fcfa01cf8630087 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_04a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7278f5f518dbd0955a09d8e7b9e07cd3665912e1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_04a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9a87a851d4c333eeb0f5c04805b2fb57f3654fae --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_04a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5a25cd4650509a87e4f9185481536d6bf02f277c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_04a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4d84bea477da18d0ec01ad1acadd652d5c8afde0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_04a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..03ae20de260b1bfcaf33a50538577d837cfde638 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_04a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0eeddfe5cfacec7c69c02a25c2bec460f500a870 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_04a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e6f8ee8250dcd0b434b5ef9781dde8f961ea0634 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_04a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a5f3538fd091be3667d0285829b65ee56e86b6c1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_04a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ca501151a9c2176ef3c5f0c539bd1cf082468217 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_04a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c4a9af054d91770eb02eeef40e0ab1755f023a84 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_04a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ab0388bf888f3ff28744a147afe90d652ae0fecd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_04a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6279fec25f8bc69ab88cd53fa51746deec14783c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_04a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aad84103fb09e5bb58315e12444eed1a659efd7f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_04a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8ab8f6b6fc1c3027da2deb8351f0fd4ea431769d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_04a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f9887c1fc1ba8de3d282b05917153d1ac6fc9a9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_04b +task: + - mmlu_alt_ov_04b_stem + - mmlu_alt_ov_04b_other + - mmlu_alt_ov_04b_social_sciences + - mmlu_alt_ov_04b_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..5d1810d1ff26cc806b9bd3ba8f9720ddcf261d2e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_04 +doc_to_choice: !function ../../../styles.choice_04b +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..42d301b8afff1c8ac4c0e1d54445cd1c04ab7c78 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4c25ee1127b2f8d58c891043417e569e712c3fa1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..577d16bba7782e3a535e5458f40f8796fe07eecc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cf58d92944306159908ecd3bfdeb308caab00bc9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_04b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eb7eb9632a4b23d9adc238c6d0774e014c3e08b7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_04b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bd42c5b63e3b2afec3f80381f09c3b9168f2f063 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..10a9f5e4625a8d830fa3ee6b6ea29fca4340b81d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a38fbd44f71bd03815a0b9cca9a5a0bcbd0fc5ba --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..44a15df0e119d1f2ba52ff37f0e717760794360e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e5e807ac16128ae9bd2266253c27a2d8b3172a8d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_04b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..da7057bcfcc78ba169ef3ccacc904cc3a8d0c820 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..54ad5d1fc2253b4e325896ec72b36b2fc85dfede --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..72c6347d5c457e9a11f51888b2ebaef0aef2b8ed --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..607811c004d0f31c3045aa99f8d74ca253b7d4da --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_04b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e7098c1373e9f54e93daf8b4366c024c94a62ed4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8e9733b9c69f1e8dfec769ea4028a9ac3c51e9f0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c255e82ff946f96c551d60cb88d55618510e4865 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_04b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8d8526460b460a2c3a1f6af91d67b845f5a60fac --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_04b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0d60b32489088a21f2e3f6606793a9bb61b05b70 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..268f3fda4b044452c89b183b58c0d34493834ebf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8b0b67953dba3f778203232d4aaab61decadbf6c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e3365979dd7dbf1060ee035c9dec05b34a85d89 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_04b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7639b9a393764916648508c06109f22bf974e9c4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_04b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..97948e98d8ee58252ed7b0fe5f57a4ac2cabd195 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_04b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7cfa35b1dd843437094da59b5b045575e69fb1a6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_04b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a90b6520b76ab35b2880f012a1270907ccea8c73 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f772e76fba99f243f082ecc9924bf0bafea7f6c3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_04b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ca32bdc80458bf8e23edc9024be44eca79633b1e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..43db99f185c8d7e2448223eed4b6e3f36808131f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_04b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..509e0cbac243765bf9587dd8a2349e0af045b602 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..680a0241ca99cbecb0c5ec1959d52443b61d30cb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_04b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..424bffdb59b2bc66d1cbcc566b3b7cd184041fec --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_04b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..847c233e248ea851c5f0d383ed11da33a5655e1c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_04b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ab542ef1507cd2b1fd021b40ded00c29344ec6e6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_04b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..79e2a1e38a7cf78593e63d8f504558e2b565d8cf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_04b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..edef94a6e85b3f61434e2ce29730a692efdf40ba --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_04b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..25d7468796bf990216cfeb8e2a6d120b2e2f42c7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_04b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f139710b723d0812c4da73ab0f9579b7551c70e2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..94bb6aa3ea5184d3de105749379139b90e000f8c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_04b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8fa3bd4b2c86138580208747e6af5f4f2429343e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_04b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c74cdd6da4bbd74cf4397f24ea17921b8cfd239 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_04b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c3cd247d07b4d0c712e32ed92783a5474d517377 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_04b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3637540cdba4b3552580264da4641fe35b9b6c81 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_04b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b97a78e09184a3a80e80b96e337ad5500d50f8b7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_04b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..595841902236bef9c9035c38cd3d3c0900b5ca1c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_04b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4080df18829b4541e07ede28ec6517e99d023956 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_04b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0101a60ba42bd12663c257e7f9ddea24310b6e49 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_04b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a96d871a6c62e230bfbb307fdf90973284a99f36 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_04b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..833a40156d8f137584e60fdaa91cfc2aab922dd1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_04b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..304a57bf4cc880c6f419c5c951294187115383cf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_04b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..84f4c03ac1068f4a1491953e55f006698504734d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_04b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d098cd9d38c672f1fe4c12f07ea4f2cdfabc38fb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_04b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6b8a5f1c3998dea67efc18a4a73f38c5dc0d5486 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_04b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e2eedc101611042604c5544787559ea4430eb6b2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_04b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..79a219f4eddbb1be2905a2e034da75abd7d58666 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_04b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4546ce88cb60685472226a0978504b846b5b280c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_04b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f1be68d40a3a0b56efd523690073720fc152b43e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_04b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..81f8329d2a4eef9c846ac03d22fb1bb3c34c3453 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_04c +task: + - mmlu_alt_ov_04c_stem + - mmlu_alt_ov_04c_other + - mmlu_alt_ov_04c_social_sciences + - mmlu_alt_ov_04c_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..88af060bce4988a473d1785127e6bdab8612b8a3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_04 +doc_to_choice: !function ../../../styles.choice_04c +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d5d3912110b1490938e11aec97c0b16be215322a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a9b85b6fea8534c0e95330291d7147852ad4f6b0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e10dc1d7b92a1fbb7d45a5e746abe4882cd07d52 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a989ebc7f2ae3e69333fd52cd1f4dd470646e3f8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_04c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..55437c1bfc3f3194a16c1e567c5450ade373cbc2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_04c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..75d9dc12b27c555dc804d3261ca8519f376a3e27 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cb4390363ef74bf2dc0a4a594d41f9f5018dacf5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2f5e1816047716f9d7c0cb626e2ce7c7a9037047 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..85390337ba251ccbf417161963fe5506f7df2dc6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe8ddd58208fbedad60d5780c91e9093b1d468b8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_04c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..36f582d6ecc8031e0ba129e8297a4957b3527b79 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cf32b53e3278640b958b38ee74084641503fc43a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..546c74b97df83f4d2f4730a3376ce2a44c10c137 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9ddcf20e75d49f20cb63fe31638d7a1d7eadb550 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_04c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d5431c8f749cd90bb08307e8e55153e3a3edc8e6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..daeb3efc85481bd8d588ec9b4f2883debd4c245c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..39912a7b9b56ace1e19e45414c80a369610ffe4a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_04c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c9c866d7bab82ccdba1df9405820fa3e0d8a6335 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_04c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a04054c321b1c1e5630e928ba6cddd775711738d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d7b6878fd7f0adac856d32d1529b517cecd8394d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..62d0a144f660ed1a78409ae59816a8f8ab982046 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9a950e9796c5acb7aefc001ea008aa9fc2eb0b96 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_04c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2f1541bfe7be1bc06fae81dd5548e3b04f4ae17e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_04c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6cb89ef378ae83a1f14db38200603955efdff58d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_04c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f8973675e1a68b73af4d0bfd7b87fc6eb9682518 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_04c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e99c141b006bb9344610b85c84ce6b1ab049a0a2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8173caaafc68fbab553483bfec7c3d5aca0e79fb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_04c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0060251fc30eb95100a36e6381e478005ec88334 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5ad89e952f7833b0521f7e501ebc4c844e5cc3c7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_04c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ebe72eac72543086205e4ca96e616e85f5fa755d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9ba0a18ab90e5adee25c7ed1a6e4cfd7a1726dde --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_04c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e634f4af8726b704d477e8e6c42623891ebf7838 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_04c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9510ba381526aa5ad04045037d0170434b910231 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_04c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..287bbbd7cccfe1290472d38930d25eb86d3b266b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_04c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c4b4e77b4a8dd80618d3c33f6486c77b7477fd4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_04c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b3a269b0414b3a3d0a402b39ee4de9b16274bc07 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_04c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..54a2680fe014f17baf7724d124d023264118c57e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_04c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..708c0c804ef565408c321dfc5bee7ebf3b04f9f3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7467c45e42eba99dfb2d5929dfc664ee6bd84c14 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_04c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3567ae8d6aa70b87fb6d480bb4efa0accc7d0f72 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_04c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3f9686c153b68d76ca2aa8b4e7b47fb03ee2d710 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_04c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4529c5ea40371335934b218e574cc88475023572 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_04c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3f6f4da37ca65e392bd96f325ea6aa750dfc1ae8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_04c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1b5a23b654a0f190fb2b7353b1460aa9149a2fb0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_04c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b3697fdb29212b2a82d4862f4ed93e94098c0bc2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_04c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fee89c7ad9e77b19d22590a9e456725b6d0678a4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_04c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3b9ebc08e777fec1626940ebf399cf95b48a06ff --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_04c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..36419277983202710858cad7326100ff20f28c0c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_04c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a40f78c46d0be43bf4b6f762cb2100ae67e30ff5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_04c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..149272c68a6d324df061ae7ee8789004caa2fe3e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_04c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f208377aff2df2dc7a4bd8ca7c5f4ce841d61f45 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_04c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ed5b5314f5030e30f9b24013ab4b0763c80bb7c7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_04c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..53d538b4950577203f9a92a6b4598523f200128a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_04c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2ccd329f9c58f0a835e92737329269aa6e3def9b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_04c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2d29fb764da5e759cc47d9142548bfb5ec6b0176 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_04c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ea188a0dc1eb0db221c9ca7807d889d3d29ea169 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_04c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4ee86870a05d65d2ba5577b1789af9b6ed9a57c5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_04c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/_mmlu_ov_05.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/_mmlu_ov_05.yaml new file mode 100644 index 0000000000000000000000000000000000000000..da5f4d3cec75c66ae582f436711beb76494ae00a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/_mmlu_ov_05.yaml @@ -0,0 +1,5 @@ +group: mmlu_alt_ov_05 +task: + - mmlu_alt_ov_05a + - mmlu_alt_ov_05b + - mmlu_alt_ov_05c diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f70d92f391cba458d38a9739b05664d449905693 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_05a +task: + - mmlu_alt_ov_05a_stem + - mmlu_alt_ov_05a_other + - mmlu_alt_ov_05a_social_sciences + - mmlu_alt_ov_05a_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..a9cef98fe4f207c08047b6847761f96151630843 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_05 +doc_to_choice: !function ../../../styles.choice_05a +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5bbb4d49ff9c3edb19eaca7b3b991be83845ea4c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cb410f592e605e107e919b5f48457a4bf73ee350 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3d29c90dd23086be33fc02322fd6a13525dcb283 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a6f32e9af5e5669d3baf9b22e3db3cb9405a6f23 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_05a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aac82a50ee1b2750cfdf9a55557ad4b2047b09be --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_05a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7b2e0958331915cbc701dba86db938a17df1d9f7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1695527b83d3404907babb5a70900364ae5965b1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f0f45f3baca555b7143ce90f7a96c988b3bd4cb2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b65992b1347b6ea6e7eaeee4ac7e324f9130c840 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7167bb76690657e06611466b1d78fbaaa21731b8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_05a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a5833f09f7bd03c769ef99e5a7ac75da63cb3eca --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c656c8938bbca6650dee746cdb4678c9b64c10b4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a765f1641a095dd3e9f70fced44c9e3976398941 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..022c1e46e48309fd5570f304b8ac93cca1385a7d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_05a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..417cb37d44a56612a62c3090b6244077a00ca88f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..89ae4af9c169f53b5f728dd0023ea6a7c28c1c16 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aa186cd65c12695f1f54307ad5bcf946a64b51b2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_05a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9c3505e3bae13fb93de0d58d253fa88ef251165f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_05a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..61452db3af0d6493f086956bbf3b91ecce58fcae --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..707280da8a7235cd94b0272d4716f6245d6ef117 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..94e0a57981ffad5621d8c0fca66f787ab3699c39 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9b199ea5828030172835d074e5fce88d5057a8c1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_05a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9d7bce73187057b05f9aa82ebd70b7f048f715a7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_05a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c629273eef77088a0c46bfc034118e195d320957 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_05a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..964ac9d3c9eaa77c63eb540d20b29b3171f6a7c3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_05a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3304eec03b036bb20060bb4c3606ed6280bda532 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..09b5d3103af2d35c95b91c930f906ac8aa3e3242 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_05a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a26783e6e77dd16efa21b4f305767690c4b95b7a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5b495c0bbd0b72a5800912e6f8d9c0f76addea9f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_05a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a27e82e243671f7c03df25f8823dd3b085d3f210 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f5eb66ff3dad30c2aa8e907a180122ab5f0c0215 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_05a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..476aab63e467fffaf78cf68cf1319eaa009c8c75 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_05a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a0dc09bdf375ed2a0c337ae44dc07aac5aef9e37 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_05a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d2e9162d4b411dfce4e9458b485b7d72e3c17813 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_05a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..69e41feab7702aed7ee5ac78d9796eb2a2c41e51 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_05a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7b98e5dc8426b4cdf88188fa317d9c8346d3a6bc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_05a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2ef16cb96827bfc0b6abefd6c3db3bfdde880fcf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_05a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..49f2bda56fbebfe19ae73f77790d670fdd8970ff --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..03412e62569cd6da6c3d6756d8a9054937fd354d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_05a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7386ab1c0593a2921026608bac0b7445cf3a984b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_05a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..506ff69b35f829edf01c25997d90c7e853d62070 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_05a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..42466c937a1cd0cd524f8c71841595c5ee7f5d37 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_05a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e544a6e8c706183f70a30fd7fcdf3e97ebe64606 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_05a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..31d66277473d2fc19ee8435cc6e158bf8aca4ad9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_05a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4ef6da023a0b146b908a97b06caacc9cb4a157a1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_05a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..44fb26b595b41f97b6db8d14bfe11713068b874a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_05a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a2eb969760f5df63026a83de46bed50f39f6e086 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_05a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bd268b2b91a6520a42d995fdb8bc77c6845f7491 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_05a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3137eee9388f60a1ff13ebb0e7d404968cf5ba1b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_05a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3533fde190ab8a398f7466ed041fcfa7a2b955d9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_05a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e5ac7be38f535afb911c23e473eaf2faa0907a73 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_05a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..548bf1ca7bb19d575df65160e6fc5f2dfbdcf74c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_05a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..07b08985b374a2ba0cc62899f65b4bb6e74c1d01 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_05a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d70a37fb51b25685b374b8584196595db276c274 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_05a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8c3161cb72da38c9849b4e002f7309b4917805d9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_05a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1a037bcf008dc3d9221d1004ccb1d617b34456da --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_05a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bcaad0cbc19f8147fdd23fd7a42f8f0542b1c3c0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_05a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ece20a9431f81add52379931f3156d82bcedf394 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_05b +task: + - mmlu_alt_ov_05b_stem + - mmlu_alt_ov_05b_other + - mmlu_alt_ov_05b_social_sciences + - mmlu_alt_ov_05b_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..b14c6288575991c938598236b3ae1a712e50cec4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_05 +doc_to_choice: !function ../../../styles.choice_05b +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..10731bdae6f2b5cc350d8bbc1ef479902351e151 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a5bdcddfca8bdbffc3b9f09596bfd4c79d3271ab --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..41d9a90d10187e469eb1a15c640fc497c3cec84c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..85e4117e039a3ab418776feea3c683ff818b195b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_05b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6c905d180b11b2d942e64dc22f17f1f123d7486c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_05b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..72a3637736c285bf19231d329aa25956ea551308 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f29f8d0ec98cc55886488fede2bca189df348a9c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..916a25a84c53e13f74a833545ee117951bc307fe --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f9d1722c22a7de41cd6567cd95856ff8e006b446 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e95cfb00c8214d2e0be4aa61d73e6ea3fd24302e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_05b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9b5191104c29875d517850580817e57d2b01950a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..77334a826872056a6e62a14aa2ce9ab8d35cb0f3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..778182146803b4e6b610d58015fab8015d8a1cde --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1aeb60f32b47b61c61c79947b72fc9664e310aef --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_05b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..58ed423bd0b0114357e2bdefdcf983ab527e1972 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..548e041302768f35b90bfb7ae4c56a36e1cd23e0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f07f5de89c52cdb70d6d1b8509457d30c7ecf01a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_05b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ec77b6e936e93d5c3ed5a9560ced83acbb698b90 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_05b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..85c228e5182a6deb6d0ea80637327cb73a1d4f41 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0e7f9ef6065f5bae7a5b2eb355364be7c16f542a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..07d40a6d19088647842e101088972a5c5d15ead1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..74051cc511c20a82ae36da4d7097ee5664422046 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_05b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dfdda22630a3ae4d65b2b0b5d5624f0b8cd321f0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_05b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..30ee3766b33724abd30d75eb5dbe3e1b7f0361e5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_05b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3ac09e8c7716a51ece6e253ac9aaa7b84f9b9ce8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_05b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..06ae5b5488bd9b6f1008257dfd8f8e2deec0218a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1565ba6658f12d94aa2fb8371af5de30cd9701c4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_05b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4a07de6f346ef1faccd96637b46a3d319127b949 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..08cfc8bff4c860a4ead115520ee455c66563066a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_05b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aaa099bc0a6fd4aeb1ec36c6991ffb6694b2bf7d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..29152b5dc8addda79fa566f10b19c15aff5d27ad --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_05b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..54647dd0a5e709a2e0efc5c452cf2ea1f3030883 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_05b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..976621d2499f7829ca5c70a44eeffdbcf1bc0a9c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_05b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1a4668c61897b5a13699e8429df4ffd84ff9dc79 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_05b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0b2626c3e68809ea9099188ac7a2f59dcaa4156c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_05b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d818f149799650d10d0f70c6f1c20ac38805ed25 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_05b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5eff9592ebedb5d03a3700c60f964acb17115b4c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_05b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1bbdef46b547946394e7ebc1190da001f683071d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ff53641886875de9b84d01e291cfb57ea0bd9c3a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_05b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cd07a402dc1485b0bd624669cd57a790d3067a2f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_05b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ec286d16516f00bd0f98306a1ff856f330689b21 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_05b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2d1bd3caa48ce742da91e476272600b7f4220961 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_05b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c1f2fc79e7509719c2c96b9697fde9c1428fd0e6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_05b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4266c2a3fe975d36d4c6817be52fea484aef4734 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_05b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..286e6c7ba493688f36d6d6c2064605370f62cde1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_05b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..84183735ec7624418372e58a73dbf85f79d01b5d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_05b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2b1b746fba14ad166f73791f3cf7ddf42b9ebf70 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_05b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d61861e9b1517be934cc2d9dcbb487bd7a47ef1b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_05b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8c056a950d6d7ea7bd708bb7688868b20b66d2a8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_05b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..af36b4dd5a9ec8ed67e3f207a411a254c58e5dfd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_05b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8cdec089681a65dbb9c98bc3c4a5b01e8ea86ea4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_05b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..30882bb2ea566add6e3693a73288ea7088609bd5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_05b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..92b73a2a9fad6b01518b62ce437a27ab2cd256a1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_05b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..727ae1a87a8348d3f9be99d88b8578eeb62bb361 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_05b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4685b4a8c0293628afd53a2d61d7621d1f6890b9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_05b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8c0e69da75c7d8fa22f01cd27dbad43bf40c1dc2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_05b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d85a0006449d4a07b8e7b6fc040c0824dc708ede --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_05b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..88041f27f6324db6399a31e1363aa361ccfbdd66 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_05c +task: + - mmlu_alt_ov_05c_stem + - mmlu_alt_ov_05c_other + - mmlu_alt_ov_05c_social_sciences + - mmlu_alt_ov_05c_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..82259f03fb2c24835accbf48e538d654e835fc47 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_05 +doc_to_choice: !function ../../../styles.choice_05c +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4d6b10f840c9b2c1c84ee096d9b4a0e984a06fcc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5a04c43d040a38e6770f8e383953f5a9c4b53a4b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f925929fc8f9738e7cf836694195894c62098b03 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1bff1ab0d1d1760b50db966f348b36db672dc517 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_05c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..abeee50aa0ed2c0e00a706f60d7fcbf1e7fd7200 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_05c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d85f42389917eed0bbbbe02db188ccd116fa8db8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fd2f672fece7db182eb68dd233dc93fcf06e0196 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..02d07d5ff695d217690e534fb62e694958f88c1c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0e18b953c2955509b58e8e69e7c0b627bde5645c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d68d7a3db6d24aee24a7d3dad69365b6bcc1dc4a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_05c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f200a990c9433873b680bd2be5d852fb6dacaf1b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aa92644429740e0113594f4d30bfaa4154ef7688 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7cb1151fafec0afd2613fa21692bce556b28276b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6253cb574d3b049e5ab2ae0b910d20b4f4498b88 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_05c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f75f1037b5eac6122061ae9dc786556bad651606 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f19f9904a6726f663fdb2225b51a601d0837f96c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a8b8c1d904d77060f049de05be3c578e09b6af6b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_05c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..859939660823f633fcbc5e20eebe007331461a34 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_05c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b3ba98abc5027934775d68acbfe4c5fbc260b08c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7f25e96b4e4f1b90f025c3eaca6d7f5f2d890f63 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a715a00e89980de5a123256b9473da592336b704 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6e9921ee82e7f16ea84a629581c629a7563d38cf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_05c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3523e372ff314da0230def5d1538ac3d7e72d682 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_05c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..62010d8684b58d3c1b04751f9f9316d2ebdb3a21 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_05c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1b1c566ad2b3f10db5114034b38c927e5c65cb19 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_05c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..db8370a44ed2350ea863fe359da77fb56295dc12 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9b2c003c5074d79d2c84fbfb134435a282b8e2cf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_05c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..697d7af6033c037089cfcb8f96496711e349b908 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e758a3a9b2b14308d46e031d1c98080e6901bb24 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_05c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..93b2f70d9aa72b1306a8b03d785af14c60bf34a2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a30146a0dc80db386f632e09a4ac0d82edc8d20d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_05c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b38687978e8d8bcedade9b48870cc5b48c864e92 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_05c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0b3a0478d4b68ba07e57d43aeb52d41992e4491b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_05c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d425bcc2d12407e80722702985a476dedaf0b537 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_05c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..13b5a7163e8c67ebb013100b4343cebc4cc66258 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_05c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8bbf9823782c72059198380332b098f17e9cadc5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_05c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..315af4b428afce8640e53a9606b011dda53ecd43 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_05c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ab99a814666d0d93d12963591eacafe191301e27 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..16715d6dc3ff53c1ea8afb38e9f6d95dc875b99b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_05c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c31dba8a1ca0a7fc137b466931590817bbacf993 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_05c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bc136da9ab82b2ea022b86f2e9d593850c7d4787 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_05c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3ad9a5199695962b6370138d81a2e8fdd4f208ab --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_05c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ef20ece5ddf6c2b7ff6c14ac1494787b96a1d5f8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_05c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..83d2ebbe267a09de4bce97a004d1ba2a0972ccab --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_05c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2cfb0845acb5a31d66cebfbad0462715bbd5c5cd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_05c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9e015ff199848672d5edd27b2e1ec06c0baf3065 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_05c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9656cfe5d25036b84c85db95966772ad78289184 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_05c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4ecbe7cf3fb03d137736c0da9dffdb135fd80527 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_05c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..355bbdb793ddf2f04cc8fb1f02c26dfbf2c90030 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_05c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c20f3c71c68fc62bd757de92c8a556379e99e77c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_05c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..54cdb1aed09fbcb2dd5ce6a24127a8faa64af402 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_05c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8a8123c2297cb44afe8c332053b362057a493b5b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_05c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8ed204c1fff22e57d9c37a3e98d0b96401e4a516 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_05c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..24f6ac6805a26ca1acc444704d5ac69470b09e3d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_05c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..027f984267585cf1bb025da1f412a54e94444e0f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_05c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d33a4fef6d9d72bb6f56744e755b8ae7886ecc8d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_05c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..14d1d8362c2c86a256756daf3c649bf91094feba --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_05c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/_mmlu_ov_06.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/_mmlu_ov_06.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c5e53e739c3928b87271fe98c1216c5cbebf7225 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/_mmlu_ov_06.yaml @@ -0,0 +1,5 @@ +group: mmlu_alt_ov_06 +task: + - mmlu_alt_ov_06a + - mmlu_alt_ov_06b + - mmlu_alt_ov_06c diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..718f8375c193581b9d7e3258952ef5d8937e524e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_06a +task: + - mmlu_alt_ov_06a_stem + - mmlu_alt_ov_06a_other + - mmlu_alt_ov_06a_social_sciences + - mmlu_alt_ov_06a_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..5269f145f77a0089b7329ff2f48dfeb33f10aeae --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_06 +doc_to_choice: !function ../../../styles.choice_06a +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4e50dad1677108a0f2ce93a7aafd0365c5ce974d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..30b360b6963fb05c979d7084eac269f409d7dbec --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a766a6e4b9950afbddb061e7a12b9e0a819b355e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6f8f47b4fba063c7ceb445696b900392f65aad36 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_06a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1900d3314597a486a31fb1a08c8d0da6521464c1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_06a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de67fe09f0f5364052d110f8db97c46689e42ad2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9bc652694b96172f49cf11b48594c12851506e30 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..94ad7a721c444ffa0ef3a95e4dd55c507052ecf1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f536b141a3e4318811b7c95eade204871ea25ea0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5dd0036144ea3fb48a25fce0a2a32878ecd96325 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_06a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b27f03efbde4218e1098cd830c739fb9bba001ba --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..03ac40dc38181b453295abe069ab6e1d4bab08ca --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b57076e3e7d32af180971a6487791393b04e9d8d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..479795ca598dc7e2fbdf25a452fc536cda6d61c0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_06a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3659eea6d400431eed3166c23f2d7afb03697099 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..551cb12e434338b7afad9bfccaf5f726dca48673 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d796f8263b0c05ea3c648500b31cd4d763b121e5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_06a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d86149e30ef85dc76cd27b74c5d58f30e14a1246 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_06a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..67cc26acb51b8419366a6f23e4c01932c36d3c3a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9a4e2e0c182f8a5580bdbd69646cec5a3c282c86 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8735604d0830fefd6e3c5f08b35fc3fab787121c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4d6d70087fc9dbcf1d35e5bd97d7eb2170765fa6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_06a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..af9df9f64dc57183cca50df79e1667317468b9f6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_06a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7ef68e4461063b3667844eb03db8dba1acc2777e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_06a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f23488699c09c91bcd4f4a35af41242646720c71 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_06a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d67a8780fdc3f09268b07d91fdd4c66e5db307d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4d571b4c39dcd5800700bf55ecb39f913c0034f1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_06a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..27c50c84a5790c2bc052945f549e03b9f4b99993 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e7062280ea8197eb37d61ee7599846dfc0ab3c62 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_06a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fee1a14bb5dc3c7a2e4d6161df0bf8538621e351 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6e77d57da4a5bf9a937746b3633aa5dd94f843f5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_06a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..938a13aa649d5931e5b67a0dbe07afb406938d84 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_06a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1e8b111345dec5855efbc05698f78fb6416c9537 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_06a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..80840d8de8787de0536df7df4a80f1f4f2b2dedd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_06a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a4efe4852f768c7191bc53b2fe7a95832945a1e8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_06a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..31195387635d9714628ae43a3ea64e56fc136dc4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_06a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..724ac307725693e023f2bb16daaba823fce55f08 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_06a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dc2b7f06499a7b80c4ab89e6495128bba07fbe45 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c5c1cb69bfd8b0634f2ace8b835abfabd115a7ee --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_06a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..635b5dbc07068543bc76ee6871dd0cd13406c9ea --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_06a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0100fec18ec8d1686d7ff901d428111bf3d5fb37 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_06a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0517296498c2702990730471c5ff98dcc93979ad --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_06a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6f53f46b3135ec582e35098ff31ade7aaaf2b79c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_06a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c551b61adef01675673f74613db12fbd73976d18 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_06a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..63b9fbf5b105193ca1b30210a66f38e514c5e53b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_06a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..942ac9e645744b40eeb81b8434c790d2c68bba5a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_06a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ca6b4e37fb7ebd66a4d6e2e3cd0ef6db82015055 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_06a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9fc815a42b9a1938fe3904d4b8e71c3acaecd3ae --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_06a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ed08dc12e43f319aead79632d3a98b934efbd62a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_06a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8542b318b43c64b1ae4818495b25cf9c6e40e607 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_06a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..85b49a340dcea740fca118008b9e937cc51b786a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_06a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1cf7dd2a028c8fd39c55e1c24060af149124e5d9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_06a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7fe72f47155ebeae931d3fe1357f27fa5b5ad199 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_06a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0c034af428b721247479237ca5febe14e59a3232 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_06a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3f915db0a41f8a65a6c0d103555561f8b36cb226 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_06a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..db1a318bf96a852c63959a0f4d670df184d8abac --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_06a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b39f57a31afea7c93710852cae8764dc22277231 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_06a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..76001890985e6317aefef3b5076f4c1283e4de2b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_06b +task: + - mmlu_alt_ov_06b_stem + - mmlu_alt_ov_06b_other + - mmlu_alt_ov_06b_social_sciences + - mmlu_alt_ov_06b_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..fc6498457ea95f6abb01ab5c2ed2274c920149bd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_06 +doc_to_choice: !function ../../../styles.choice_06b +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6179fecdd6ce2d34a5a71697ae93a1fe4bfe1631 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8dc6e13e64bb779b89b5a9739d26846b49df7ffa --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e3cb40081a07c426acee5aaeae9c1e1964af51c8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..73bca569b35d1c6708ccc77756903ae5bc1248c3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_06b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..004d1f96022b4a739a836f0d9b959dab6504f789 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_06b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f1f1b9d65191db699f4ae3c4cd93534173c65777 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d2cb94a4da4492a349649e97bf37a19f64d65a73 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dafba5c34e4c753dfcb273dc45a3965b7f02b2f9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..17fea247e4a151578644b9c11c2dafd29c236045 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b654b53d2b455bdad05bb26e2377f14a5f28f53a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_06b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e3e20c548544d5078f76d41e4eff56ed8493a32a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b59eaf74543fe4368f235588f7de4aef370dce6e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..17a7f402468f8dc4f8a628b6c00ae842d0a43498 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..67fd25f1fd32a92a0f685aed74b804bd72667e4e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_06b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..09682ca0c924613d9586b9c3e412db29a242a60a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..332a769578f731355c96e3ff4136bb593abd4d56 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9812ff4e1cd1c2cdad743d8da6032a90f842d422 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_06b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..af72cb2c5ca3076a01cbd64d2b7b5c72cdc925c5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_06b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9ce58fa341e6d12263ec1a628ea147a2be0c014b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..15f2a06c5d55717861640f0ca0b28165af880a0a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e2d857d46634303d64c5eb3c8acb66d236ddb8d9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..51491ad2fbe0c6246ffb90ebcca8cd1b18d818d0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_06b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b3bdd3a18f811cc251ca98f4d2863dfc0beffb1b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_06b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..84250124384eea6e9089a6746dbbfc787f343fb4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_06b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7b149fd51dcf16168ec5c2b84dd3c1ecd7aca4e1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_06b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..73ded429ae8e756b1d92cc9b19cbcc5a929c5c0b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e26eb966bfe210a09aabf460a908cb9049c15fa7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_06b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..12f27df9b9ebb06e6eb72b9f29b770c6d9711d8e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9f8c4f3c5bd54748e6dc021cb95a2832ca8c4c92 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_06b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4b3547ff8819fa323c8f74412ac7664ba6ead0f4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..613b44a34822dcbb038709d22d1004aec77b9460 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_06b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ed0897c1c3af767ac410740748521d60e9fe8e86 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_06b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dc4a6b070f8944ddd1c1ef233d30d956acdbc3bf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_06b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d8187a12668848fd613cb9ee7dc1a5a79262bf64 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_06b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..933f50b8108450749d7c95dd0338d10b168e6d05 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_06b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ae7488888e95a5f847305b3b8581d14714cfe876 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_06b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..51de0babc30f5969919380c5ad9504fcf1da7cc9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_06b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cbc36e6dc3defe464b0b8e1cdb0db4061e2877c9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0faec6dd85e097a795019107adef4dc41cf3bc26 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_06b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c690b7c035428b17e92d759647e7caa9ae18c41f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_06b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..169412c6f36256c249169b5c625fdbf89647d78e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_06b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0fda3a54dba583f50aabc4e4fb765557387b002b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_06b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5289c359df1760cc88a2abe55c7533dae262afe1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_06b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..db749c8cd8171ce0866a0f059005456b0fb45520 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_06b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c33d1bdcfe6249a79f977a4ef0a9f110bce0b5e0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_06b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..247eab866d6875000dcc00a8f645baa601f05877 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_06b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eb87bb719a1041760f9b890ca227263c039a330a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_06b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e65e27de11fe344149c27e40acb069c09fa0612d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_06b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f85665340ad36f1ab3926fc6ccb93b5ddd3c9841 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_06b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c9ab6259e61238b76c45f17b57627cfa85cb333a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_06b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..08dccb1c68b253a3d3e0d14561555fa8a32bcdcf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_06b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6ac634d651069fa9fd55ddfbfe97c58f375f9a60 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_06b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..488ebfcf414df6928df84e7e3e37abe6b03edbe4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_06b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f36a613578f2ab51226648c9cf3a71d41176b588 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_06b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4c4a543ad7e6a2a458dbf04586a137804f798d94 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_06b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a11b29e3b692986c0755ad0382f93b77a07f621d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_06b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..497b55b13c335cbc4b1ea05764d2035a106b7ffe --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_06b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1397600afde6543b658c8bb7107da583396998bf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_06c +task: + - mmlu_alt_ov_06c_stem + - mmlu_alt_ov_06c_other + - mmlu_alt_ov_06c_social_sciences + - mmlu_alt_ov_06c_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..414b7b2580977fe2212b16f2ab60e89edcb69a02 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_06 +doc_to_choice: !function ../../../styles.choice_06c +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..69c61fe4a5b33b4570c4828b0f3549a3eb7aa9d5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..42b301787e514897b409dd6ccf31b5ab59c5b29f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7e984f02e9f674d27c72ab0d2de5c8cf28dded59 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a0ba2227c740d4096f36a28cb606155ee9840328 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_06c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..217897233a9fbfc56aefd3ff5768385a5f0b6929 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_06c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c57937fdbb43991d1dc7e138a3685c9a12544421 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ec529c9b959cbab46b309fd5112c3e496581c341 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..88c3565cbefbc0dc7efd3fbf416c80565244cefc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..563950882cdb0be3cea81a1f56b71234ff99fbf2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c1584be7f3fd83e18e2d6d4a94fded720b07bf75 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_06c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..07e4753cb39853a309d57cb06ac112aa016ad9c7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2f48cd9c4e47c2441e9e1fef5ff994a2a77dff21 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f66d241b80a3bbbb319b0a8e99b4281e3b2545ea --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..38b1149fa0bd35a4178932d5a1518c96c39eb6ae --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_06c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4a48607272c2f0c831715ce9cb7bba73447d2c43 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f2db4ce6fb6163067cfc726bc83dc3b5042f0c27 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9ebec05769dc4e7c89c04d327a19b5db6c459127 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_06c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9676d8856f0c9db4255ccb51f86e97507442f358 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_06c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..30fe645b620327c5317b1e7e81689c52dfbcdeda --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..86251d8ba9f9c36f3c0ad7a8328de0b1cdcd5c2a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ab05057bcc0de0e2a4f413556579330c26b984da --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f2c07f9b1f05ffec9bf2dd3d878506030d2618e7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_06c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0bc8c6a76056e55d6c62a59db5ffaf070985090b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_06c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..47308b257b64e101d380a7c5afff3c70724e97f6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_06c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b16874786f35170cac69284fe94f84ddea6a41f7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_06c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b950a7a08d094cbe70accc50f2c7005901e852cf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..38eeef56a2b8c35a3f1b3bfb1ea022e693ef736e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_06c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..15da1569c46de39cfd92623b3513ad4fee74c070 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..975d34ac72ce17e31616fbca03fff6a0ababea64 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_06c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..211ff0d8eed932479352b7044149baef0d5af5c3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7ddb5bcf2a4a973fe7258f43ed5e857fe437d320 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_06c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8900bdec9a922caf262877a280fab70724350373 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_06c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f8c03c6a624419504f2c285b34791d279fd5fc29 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_06c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..809864afd6d5920f0782fa0be60e91cc8278ee05 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_06c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5fdd16f924ae5fb6e9b408e7db2fb2e2a91acc8a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_06c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..70ad5e92446de7fc7a7bf5d37d4d227dff16fb8c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_06c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..65822b08121a6dc13c1ff0ed620ef99640b87116 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_06c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..84ca23500241b9c096b856fe040f427bbed263ce --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4f442a5ae29b31d660a71f175adbd160a75be7fa --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_06c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9fd75423ec5ccbc0ac10d706cd0ccb31f58414db --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_06c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2b1ae607f6bde199c357732de14c074de3d840ea --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_06c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dc5ea5d64059317933d59f2915b67a6fa3f9c6ed --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_06c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..24e16395a7413dae7c10c56f4521b7ce1fe88c76 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_06c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c267865e330fea30397a5df0e487f911de0f150b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_06c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ee5bc7da453e4631edd0004f245250cd446b02e2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_06c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ac32db390b7c662ee567c8f8d501029f1bc8ac69 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_06c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d8ed354826ece10cefe46663c2f03ab171a6e1df --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_06c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ce4120831b621edbc4130fe24aba96e2ef83473c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_06c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ca648a03de5078fffc9ed4f86d7f97515da1ba0c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_06c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..57a16e010456d790bebadfa67dfcb02ac1ea6915 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_06c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..728c33259a2288f7cec5ea71b91a9e0f77c5436c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_06c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3a0c30d5aa009f1deb5b7745fb62441316afbabe --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_06c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f396a27f005deee2745cf1d3625a913edf47895e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_06c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ab9459774f5b5be89acee3e387bdb9af09b86c75 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_06c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..118e3334cbabce600ca6f589d348b10f8cbbac51 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_06c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ee29cc4f38c4e122bed40e72392059a0327f79c4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_06c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c48c15b6ddc36e023ab8257bd33786b9c25e9468 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_06c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/_mmlu_ov_07.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/_mmlu_ov_07.yaml new file mode 100644 index 0000000000000000000000000000000000000000..844527341d76c6f43346d15ccf594224f2b3c714 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/_mmlu_ov_07.yaml @@ -0,0 +1,5 @@ +group: mmlu_alt_ov_07 +task: + - mmlu_alt_ov_07a + - mmlu_alt_ov_07b + - mmlu_alt_ov_07c diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e7885ae7b95806331ef2a2e7f4b9df98a04f4639 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_07a +task: + - mmlu_alt_ov_07a_stem + - mmlu_alt_ov_07a_other + - mmlu_alt_ov_07a_social_sciences + - mmlu_alt_ov_07a_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..a7932f22a572c4d1b6fd012c58fea3de97ef22c2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_07 +doc_to_choice: !function ../../../styles.choice_07a +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3753f10fe8567b6f665328627642ae6bd76e81b8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a8ce274cc7ef1e2f990ad1884170e4165aec7d7d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..daa8c35853730a44bbc8da245643c361f4b1f7cd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..30f52843cec43facfc597422621a64dd5ed754bd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_07a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c5bce59969ebfc6324b13dbce72ba7dbd8c34907 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_07a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fd25e096dc16484b9f4a2e56cd460bd18ddacc00 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c7fec3a7630f3c1569eae536ef30cfc3e5424b84 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e3c5db692a6d91bda42ff03b54ef89caef151fdf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7a30111e0b8f2e35b9b284d03a4c1182cc2f3217 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2148d7f7682e491c106a9e321fb4a578d5954884 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_07a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8c34ca5f3a9714947e9f1bb739ea49aeb7d3523f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0aeba991dd49128066990c7eebcca0703500f6a3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bc922c0c5772b1eaa29f272b007674515c3c85f4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b422078cbbfa94509185d3b105f0be440a5375eb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_07a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5dccde22ead21adc34b8a90ee365da07ea0a023b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b28774fe256d014d0f67e076b1c1810e4b15c3c2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7079d212eb95a4374c298fdd541e13ea8e3a5553 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_07a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..20adac4bfbb314216ee6b36cc6624ab1b4b96f51 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_07a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f4ce4a9fb5827805a5998426cecc965c327dd57e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..feeeb16502a059d749622bf1dd4eabef27929316 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4282ecf12dccff0869a33eef75ac48d7f78004b2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6a66e6345fa20b242cff5d5c9951e4b34da73b47 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_07a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..43b64f901e2afe81faeeb616e82f1811cd109fde --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_07a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f2c019dc70bb002c51b7c7ff8f444c4c9868ab70 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_07a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..89d58a241272e41e9b10496ae42f3652ea787e70 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_07a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..49cb05ae07330b844a3be7c8a136b0766b7d817d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bee68ebf28cf3c1b159b29f9392961cf677bc199 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_07a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..70df43b2c07add4e30d43ff9ed117d2d64db79dd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..919ea53bef11517808a84e25fd9799cdb0185428 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_07a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a76715c53e3faf57872430edbc1ddc3e75482b51 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..28f26f92caee585890cde285909c210d7361493b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_07a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ca94340a23c6a21f7be75b20b503b85aa424b5fb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_07a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f97857ef02b582dac3a9f651e58c5f73ea0d3bff --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_07a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..389af64c1eb5840e98bc77c1c8de389a2d3b8bc8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_07a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0cea27098023cfa5cb20a1607fe2d86df582cb9a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_07a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e274b3b377b3ab49b4c0fe3479fe2428233a3860 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_07a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a648526f54729b5ee8ae7bea99bbaf8d9445b67f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_07a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..27f5bce79bc0baeeef04cccb22b6c84e1cd72f68 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3dfe35e0a5cb692ad93e90570ea4bdd6dbadc984 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_07a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c9bca36929ad500f76867aee331d147b2356ea28 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_07a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..72f7652366cef07a9edbf98a4063fae7ccb5db0e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_07a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9db2879e3382321a1c78cdb640fd1bb46c67cd1a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_07a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..87022179df56fc96127fe20ab77d6cc7d0528778 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_07a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9adcb5edf9dcaabdbd3507644b03d070a059bc20 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_07a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dc1b458e66c4a7a810cb0ff3461c3aec52d00af8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_07a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f6732247eb8ecd08a01f2e3e19534442b6b736c0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_07a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c0b82dd01d9ca67608c3a91b57a0679c370b6913 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_07a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..86442333e928ede3cefd7a85b25176d8312ad706 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_07a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..229232657f980d02736390302791a6cbbc5c6c9c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_07a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b82c69371c116cc80fd2016f94c1791d06eb12ba --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_07a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7099ca9887b94b97ca9150b6119b374bebb82d93 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_07a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..62cae6ca835560f1e0af6b50b26d12b049198cbb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_07a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9934c8680981f861c380087f2b3209b08ed15082 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_07a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe3c7c4f229099b895c14f4e80ad46508d4a26f5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_07a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..34c3a50773872a25e078c7bd10c22ea0a523664f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_07a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fcca39be97350ec07fcef3fb33ee47bf26cd64f1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_07a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1285fe7575578b1e1333f8992c8c3fd3c1d1385e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_07a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5a2893917e8d2111ed08d0b6fffc6f31e2b8dd55 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_07b +task: + - mmlu_alt_ov_07b_stem + - mmlu_alt_ov_07b_other + - mmlu_alt_ov_07b_social_sciences + - mmlu_alt_ov_07b_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..344e8b4936ddc2b519e4be9d1b115b4021852774 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_07 +doc_to_choice: !function ../../../styles.choice_07b +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..547cec13dec7b30045d0e4b8ea01d815033a3458 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..449d874d1537bd6be2bb3e18a1ce0f91b0f79d8e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e1db708b30e32a76275d1d7c583896a4864be1b2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d3625651e170691d75388fadebe58b1c20b70be0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_07b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3efeccc0026f018e76855b694e71e9405572d5c5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_07b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d363aa61a68ffdca9aa6b7ea4b9a8d31a6796f5a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4531065c200660e8b230c49310aede169f01df85 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..61f6f3feda7a3e451657a4a7f6c09fe03ef422e0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..272afa307bcb14732c528dee0648b93e7079212e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7a57600bf59d03bf27fc66f6813963b049507f58 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_07b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1b1a4fcdb4097ffa6d0e4482380f21e324671a6e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a5c573a7db52b9a02019dabe6d0d65e67b55cd5a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5a26c2bef05fc7e0a830d837b9e41c18400bb1c9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..232a53b090a86f908556a94bbc3f327245ce9a02 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_07b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..604ff995708b2c01cc1eea4ac42935a69563d307 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1fdb87d94c5ea9eae22324c643df457399ff37a8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6795452708d5b9bb0ae13c4690bd21d355f41525 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_07b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2a420c03059ee992ece394c471dcbf7a216e3c51 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_07b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..168b8ee5b1a1db58126823c04b1e6dd347c89424 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bbb34bc6328b6ddf623f869973654d3d681b72e8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5b34eb028ed3214ffd2e2adafd7dc706fced1f9d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5000f1822633e978feecfda51914b8a1d73d0919 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_07b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..220495c35b1faba82e48c750497faa0f117ed2ea --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_07b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d8591ffa8b26dfb569ec1f41d17fcfc7cae6c09a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_07b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f7f126f9da0b6a1087ed61e84b3876395fb9601d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_07b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f7ca038f838d479729f29a70270dd40c5b4802af --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5b4748ab255bbad97217d0ae5cc4a498aa68eb33 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_07b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a71482548539c0feafe796ea91e0c221042a4f9e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..23f74d98fd687fd04be957a369af9d5d9fc68d28 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_07b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5598a1404329140281d211bcd1ca9d1bb298c674 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..75d0cdba90d46426a46f12a8fadbf876d9019524 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_07b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8b36f15e60a3689fc1fe0d9e065dd5598c7b0f12 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_07b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e86577993633f0ee49673d19a1f2e58a29296b57 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_07b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eb8a55f38f6762db3455e0ebb0d861538d85f74c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_07b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7c141de2ac2e7d46285667e2a9d432c2170ab682 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_07b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..653a89089311f195bd1d27e30cb3af2b5c7ef759 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_07b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..585affca6e6e728458f106ec07883fe2a3984f5d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_07b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..309b04f640c2b04257979ad901290a78db0e0440 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5c931d2c28202b5882e25ea9fbd587851460c795 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_07b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..66c302b4c4b6b3a119dac291b55b01e64bd8ac0b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_07b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..98f1baa7857f7730256ffb956c7abef9636b0d57 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_07b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8a0e7f840ce239da7b013782aa9a855e204fdf9a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_07b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2f0acfb7db4c0e41265e6dc96ed7125eecae2d47 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_07b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..17913ee976475d3288f9b40ef4db1ad53d0c2182 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_07b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..697b66545cc31690c5ab3514fdbbce235f52fa1d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_07b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0fdccc40090f25739c5defdc6097229c0e298eac --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_07b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9d1d6a533d4132a3856effba7affc2a94219d4d5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_07b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1499c6c5832592406aeaf915bbecc73a2c266862 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_07b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a90140dc704cee1d0afebb594b6808c700284e6e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_07b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ad89f65af98a2caffa1e94930bf07007021cce30 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_07b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0fe83f29389d63b48db3bab20a3243774b7934af --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_07b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a92339b506400f5b04f4d6582aa906e35484c685 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_07b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f03ac5431eb65b628d5c0f12f4f3e26b2ad101d4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_07b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..20c98b44d7dabcf171934efef6168349557cf94d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_07b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4e9f9a590ebbad28494ed8113fb3859022ce34ee --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_07b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4e238d2cc025b4b3532260d07191619b4a15d1b7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_07b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5698c38f98422038073181e9f8f2ee3d930b1a6e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_07b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..df79e5c8a843ddeed1db465d732763f62e3e7c9f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_07c +task: + - mmlu_alt_ov_07c_stem + - mmlu_alt_ov_07c_other + - mmlu_alt_ov_07c_social_sciences + - mmlu_alt_ov_07c_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..e92a33ebaf9b5f7868cb57772568addabe12bd4e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_07 +doc_to_choice: !function ../../../styles.choice_07c +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..686388ce756d5776f8fd9160b83192872083bf11 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c9313eaaadaec47549ce344fc625b9ade150cb5e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..47b6704509f282919a2121eaf9ffef2ddaa71f0c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d3097b9cbc43e10551a2103532fac7fdb0d5f7ce --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_07c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..743516b6a3e3cbcfaa12a1f92bc25b480aeea408 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_07c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d79b192e2040b892ff5522cf63edf220440a0865 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6c082e4d2fb847a60838ac24cfbf7d71110f695b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a8566fed2289dc2dbe3a60c05180e227674709fe --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b765635feae2b4232f35c4e9d932ad8c9d56e6d7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..90eb026a436af8e95aa1db47c21f3cc49aa4d84c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_07c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..311a60736bcdbad54ab4f0c15264c1d811bf7fc2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f63dccbd4b75392f625910857cb643c80c42324c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ef2a5eac7a978f0494f8958dcc3d21e87ecd010a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2c3177107e673d7840d111cdce3141fb49876ff9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_07c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ce3b5fed2fc41f6468892fcbebb7b9911ac8d714 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..86dcd1cef558ebbc9cf933172205d3114df0c9dd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ebb405230013637e64312e4f91e472c06ae72fe2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_07c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e9cfc2eedcc3b3e53139feb9d827f4612c37641f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_07c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c73d11f43a000d08fe183bb772e8200176d44fe9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b5063b3f44d54558a9e8a1e87b64e12281e467ea --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b3f3f7b631ac7cf86abcda2f7a8faaa03c29de80 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6a3647fbc9793d5c77a07b220e393f90e6d9ac31 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_07c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fc022f25225940fed14c0b381cf268ee1e4929c6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_07c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9a205499dae7ae9481949478145d458388a6b973 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_07c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..84125faa74784f97609a18dd192359e2e73c5118 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_07c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..147f21fad11371576222f4571ae77a82b8906322 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3d58ea11a0b0248e213e51c7adbaa40b9476a934 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_07c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..59ee2a8314323afb7befaa311c0109591b4888f2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aa344a82a9aac8263c085b5764f77537e2ee15e9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_07c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..689675fb8ca87b01fd44bf385d99c32038c39b29 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..142e62940e88654364caeff9e3b95501af578d14 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_07c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d2113b49852fa017673f62ef5b026eefe0624e6c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_07c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..22fdd8063c8b47898c0d3275acc6d8aa4fd007c6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_07c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..93d7fb13ac6af0b1fcf497913d386466a6301add --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_07c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..acf5f6f84f3b234e680b1e9fc4b4540c75d4012f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_07c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3f77c137ee5e5dc7bc1e6acdc56005f65c637f47 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_07c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..139b184d46fad8b41d5b715eb550b0e865aef401 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_07c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b8559c91b3e26306cb46316b863472cfdfac78d2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..532f6884215a507ceed8bb474e541312fc304e26 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_07c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5ce073598b04ae94023b2a1435196e6209285e6b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_07c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fca68ad10a8e5c8eaa2187f7bb21cde945fdbc11 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_07c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ca70e3bece75affe14d68fd82c6b4ebd4e5c362a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_07c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e79edbddd9f6dce29c71171d2106038c97d861eb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_07c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bf0d4b526398bb357a7038c4684f2faaa91c5314 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_07c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..46c5c29b35b579b69250e320d1db5377fad75b5e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_07c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..12f53153b4613cd8e4ba5d21eb39148c2476897d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_07c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9f6002af15215d96ded68656e05a1513ab0c630e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_07c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6c2f62868b35065c2db3abf54c2197802ee923bf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_07c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..709ca844d0823b80d3b360c916bd02b8253277b9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_07c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ea310279e98e2c2f1cf557b06767a53b2733e047 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_07c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3cbb0207db65f71487f25413f0d58cd3de5a0540 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_07c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..87c9d318e22f4205e90069edec0a74ba7d671d49 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_07c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..757e716fe641f9489a2d8dd225fb760e7583afd5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_07c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2a1244d569eb57d30f889f2e7861508c3afc298c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_07c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..00effdcd4eeaee88fc4cdf56a290e700809dc474 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_07c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..238eb61dc3e0792ed2646581abd4b34c11c0c6fe --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_07c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e1df0e034bebf3409cb5fbb2d412ab32005091bd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_07c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/_mmlu_ov_08.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/_mmlu_ov_08.yaml new file mode 100644 index 0000000000000000000000000000000000000000..378988db2e084ad7df3bbe2ad470723917aee771 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/_mmlu_ov_08.yaml @@ -0,0 +1,5 @@ +group: mmlu_alt_ov_08 +task: + - mmlu_alt_ov_08a + - mmlu_alt_ov_08b + - mmlu_alt_ov_08c diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c62eaec2685d9f34fddf4bbc84db0fe234b7a79b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_08a +task: + - mmlu_alt_ov_08a_stem + - mmlu_alt_ov_08a_other + - mmlu_alt_ov_08a_social_sciences + - mmlu_alt_ov_08a_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..544c5697b63c7dd38f579a16c645b5702c03d6d3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_08 +doc_to_choice: !function ../../../styles.choice_08a +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c9d2d0567ec87085d04ec019061b0a2106e7082f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7981115c8465c4856b0d81c26576067e39d43d67 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..690b3e185ac7d6e4d6c70eb8acdd0f3db41a4c38 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..80304895140b3d1cae1fb8e3b45e16d99f7b2eae --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_08a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fbd4943f37c03ff3255404b7b2a37acb4919cb4d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_08a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9779430a282e4c28be09418c69b789e92076abce --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a87d02a6cff075b893421396449536d488d20915 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c65f45587c23b7997e315979ce7e5a814b4a0865 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..298c9f74a44a91fc642fdbe70da1e21dc5781b9a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4efc1b50ba1c021a5c63f0124d5dd36e295cc3e4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_08a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..13dda4814e203cd4e196958e132d75999ffaa404 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..deab7c1558fa63137d2c452e72c7ab45d70b2089 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..04abf2533730f81ba5c8c260166cba7d4b7aa0c4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3ea7b4eb0f9673924183c6cc3bddebd11634d8a7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_08a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f0cc66b8e09d550f629871de3a06fe0c3156a7f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b9f10a846d4b9eda161f95f6864dd0bc0b0a30c3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ba76ba4b5ee8210411646e8923e01f327efb9f53 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_08a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..15e2cef9502fede0e3c38f8e7b8c6a6c29c174f2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_08a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..968d1f62893627ed1df2a15470b0f572baf28491 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cdab33e40ae19fe66e411d43769bf568d1f55df4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ac659b8f0f4e9eb09bb9f6c5acac807291d23f40 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c89afe32830505508bc0808eae19ea8ead2f85ca --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_08a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..97443442fa455b43a81d49ee1352544e563885c0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_08a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..37d4c3699be7dc3ed35b530d04264fc630aa5dc9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_08a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe3c4daedaa2ac173c10c4eb2b9a495f6c11648d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_08a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5de10c18237e9e4a8210fcb7c52fcd6216762973 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5eba0428635dc7fc39faa13b9b79182623ee7652 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_08a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a61f7c971e24df7bf4ddcecf24fc2fb7e8c1bf8b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..88bf97c03bfbf625fd7633c0d6306f74a745e15c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_08a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1b0cb33c388b6c9f41a844e68d566b2a21f79778 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..277c86b7d10d402c036f78615d6f2edf4d16821a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_08a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d223949a48b139e83918d4067774aefc7a33477c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_08a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2cfbfef6fadfe9943df2802b56da5d138079229a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_08a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0867836d3211c2f939f85b6bca8249168765f180 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_08a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..31362f293407e3e8b1aa6ceb522165c49bdd6685 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_08a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fcb7b715b6ce08def2bb47e62a9d165fe10812dd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_08a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6e5e242c32d520900307557c2c832c61428fe938 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_08a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..67da0afdb919098812b26f6098e9bf9eea61d5f1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fc962a538f761a55da1552a0d34ebec460201492 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_08a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bfff977de00e0ecc1e29f48cdc30286737ee6536 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_08a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5d68da8637306395e1d29931d882f08d2a332919 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_08a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c131112da6018ab199fc747ec0bceb60dbc25524 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_08a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d7daf045049b92ed0cef24a344cf049cf5353cf7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_08a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..848b4cb11cf17128218c4c0055fa8776520cf320 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_08a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f3dd18d6d17758b09e44dd187a32d5b6c017464 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_08a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..099f15f0162b4f231687708c6a2925b57e500d9c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_08a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..02bbfa617f3ce9e36fece8166ba02202ee27d597 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_08a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b39e93c6bc2858f82217833bacc16befdc3adcc8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_08a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7b39ebf944cd438e7652482c0baa5acd081cfe64 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_08a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4c70ad68bbaf59870a015a75a3c478e80417033b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_08a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fb06ce62f4264065fe68c41321579430d28ff885 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_08a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3fe438c28340942cb6d7bfdbe1d8161d5a534b68 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_08a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0794c491a3a44bc915a5288a537fe9f05b13186c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_08a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f9fc02a02c7d734b246f0365605ffd0b54af583 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_08a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..77bbe90a2c158c2c90ff81f49392e853bb855b22 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_08a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..da2aa7612fd46455470320c8456a6e87b5f6f530 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_08a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d636c347328324689c5c4f04d768dca03a117fc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_08a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f872f7cd1401651e316f075327292052d2b01b81 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_08b +task: + - mmlu_alt_ov_08b_stem + - mmlu_alt_ov_08b_other + - mmlu_alt_ov_08b_social_sciences + - mmlu_alt_ov_08b_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..fd9d94dfdcd99740700e37506fe03864c03b1bb1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_08 +doc_to_choice: !function ../../../styles.choice_08b +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a0fbe9c6b745199c062013993bd3356f5283603b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..58bfd0b12bbaac4b977a17698ac5c7e763c261f4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9eca5e1f0cbae3a5d2557939e686187b99528f44 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..36995b7d8706f74d4b40f61c0d5b32a896d32353 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_08b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..56123d3ba6b4f185fbff86c64cb6c39ac01c99de --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_08b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..13e57ee3445dfde605bc15bf3ff3853f6bd27676 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9734b44350b22a902fb5acb04f7a06117d6e2514 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fc442741187b658ea9de0a0e8fb81bd58bbc1d67 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2d89509d5a9fc546a9cdcfee43b182b128021446 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..19dbfc126ff247d6c956a5cdd45c08de2077e053 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_08b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6d73834971863a4f9ca143b6df05fdbd4b6f8a5d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c02dbf1493b83d18654e713db12fd41e01ca527b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..185ee5bdf402f21302b1f780de0a677d1b9d31e9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ab318dac5d0200b5208dada3d1558b75cde80843 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_08b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1efd8a5e933fd66f09f85390b97fc98d5c54ff27 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f36aeec4c50866ba7cde852db36da6035b970461 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d5556b88e35fe6f347257d632994d589d881b268 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_08b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..75cd95122a0122b2ebf1acde777e877ec66b993b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_08b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1474b407e5afb50540e42b2071bad8173a8bacfe --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..060a490e9923b80e695eeed9743fb209935eafbe --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..355109d269c09e1d00dde98900c8db31a5e19090 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..360c30d8c54726ef9248a60e3416920932b1e519 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_08b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..693f58aba92289f908fe1dcd8421a2f325dbf1c4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_08b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..999ff8480d9b4547d35058893f3596817326dfed --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_08b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..18d61f39b2a30498fd6cefa4421b17dfe712af44 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_08b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f3f23469c63c2c51ecf276640fcbe63533d3f77d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c4bc1bbbe7cc0e2664ccc67ff7c08d527f5149df --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_08b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0659a800b60d6551cfd45370fd10e2aa0e9665de --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..803e8b121ddc5fd4fa5896e4c1324b8ddfa173cb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_08b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5429f118f285a085eb891da15c7c8ffeff238fbb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..495f7b79b4e5e784b645f94e795cfd2b541eda2e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_08b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3dab44fa4a2f349e6a0e2f99c4d87d221c59e7a9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_08b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d5b9aee9864a0234663484552439a9eaf163be56 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_08b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..213f13dff49052fbf713a29b6533205f81317c2b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_08b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2cfe6b5cf32e447aba16f3b3bf343c5d3f439780 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_08b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..191d570bcb95fb2b8199edd7d3c0f149deffaab2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_08b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..331e54ab06f8d72abb2360a4171e6616d651d5e9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_08b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9232faca8b7b6d158c27f6b7415e7a7a2d4faa50 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..89cc432b30d101965a00546177d9ceec8ab23448 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_08b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e06b0100dddcfa64085710713b18aaa594163cb6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_08b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f06299df9121b86b0471ae84cbe0313f0421572f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_08b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..10456aa193914d7ac5a13561f3f259bc893fc9ea --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_08b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5057f1e0bce0df0d4d304bbf334edc4b0d79b100 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_08b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..78ed268a882a3d57f678a283736e4187ca6436da --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_08b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..89bd98f04d0394e61140d0fba0718243468fa0a2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_08b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d8566dcd3f93c5d78a2afffb27c2f137b0528dd9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_08b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f34e833c30614ac5c9fdf9798cce13d608fd4156 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_08b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fabe6f8018ffcbb7f13bb186b2acdb382249128e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_08b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dace16a872e36f7a7db29cfcaf6843e82692e71d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_08b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8b49a0b53565a849898c4feb4cc6e5b4790b49d5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_08b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9c7858935d297d722ea436565dc760996f4272f7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_08b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8401a7704bc08289927da352ddd7b09ebb739b96 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_08b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bc4cc164aec5290000e39ea89fa5b58113ab377b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_08b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b7124605d189d79e66d90b2750d6c7600ed1eb3b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_08b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..11fcb95f52f063f5e6a691eea3a8b48495695898 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_08b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de8b1fd058794da62024de595d221ff78d62a2ca --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_08b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b4d5a5d641ed79c894e1d4380c31baccf2c9f05d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_08b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cd6e248bf265209c9b7884d91b9809450e371050 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_08c +task: + - mmlu_alt_ov_08c_stem + - mmlu_alt_ov_08c_other + - mmlu_alt_ov_08c_social_sciences + - mmlu_alt_ov_08c_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..fccff023927ce5ef92b879c484e408bf71c89fe4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_08 +doc_to_choice: !function ../../../styles.choice_08c +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..28cad61625a019dfc25fd2d531d523d8f6d46f80 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..118593a2629e13dcc8efd4e601098eb5a42fe35b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..11dedec1d513cbdf8a17ab9c4dbb86aeb27b11c4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..49c7bf7fd993dd6ae61f52aea99a51ebcd4007c0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_08c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8722c2f86c98995c90d57f1a1f93071cb48174c5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_08c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fc4ccd5ea5dfaa83d6d07d570b75a6ed1d55f7f6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f4690abacd6e88a49e3059a36fa7589458cbe012 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..03d4edfc1dc743806bfe882c9c47c5312833f21b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..268fd6dcc7037ba258999dfc92ac0b7c90180304 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eb5cd9dad877399e5bef26b4d13a7535a985f977 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_08c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1da6f78a258d2472272c33258c6ea983776648d4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cf299685f631d09f388ebdf18a5f403baeb740b2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e013427758aee9619a700d50ec428feca9c1baa0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..184e057e2435e7c9b7cf61206c258e45976cabd5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_08c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6a539547d8c5d85623ee3acef4490fcb6e049a4a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..65e9f81e88af333db49a319a13ce232d46ca1705 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..21ac6a0da788dba7490fa6af9e4a38b22a66c240 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_08c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c0eb40acc1441bac87f3aaf3bb95204082482056 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_08c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..37e54dd5490d32e0b752fc2e73d1d48e69020cf3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de1c42abfdee5d1ffa7f2d0e281f9c9d03435e19 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b4aa54c47498b66bc647748e4098484c17acf0b7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8bc0048484700e86c526566e651223b67fdb4311 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_08c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bac6e8122b7b087cef81ddd0a803ee18a8092bd0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_08c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1b9de7a326529902e1bb5eaf69186999edbad463 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_08c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c7036e9c4cb8df182e28092eaa2b559a2aabcf2f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_08c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6952d358dfec2e40b06bc07ec7864908c727e3bc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..13c836cae4af79e179f0b9e0c180ecf9f2099ec8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_08c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..89bdcf544e839b58bf177f62e5bc212671a6bf89 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..58b2b0770d22e32feee73053823ee20aaff267b1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_08c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..811af4a101acf6f7e90648707268ccf1a9ed9ef6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6fdc9483e93b9d7ee3daabb964c650514cc35167 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_08c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f39b8330246a86774692c4a6ac42d898eae55e56 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_08c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e452b5106d5526491d3afc2dd2e3d3b2298b874 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_08c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..05804591b09b4b84de5a6f2fe4973002d3da924d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_08c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9798c4652608759a1fd297927650d05f9bba17df --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_08c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5bc1236c2293f5e3b231cff6756d178546ac5ad6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_08c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..659afcc6ff3d401cfd61f7429958ca180f8f6dee --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_08c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..34e70b767388b76cd27219eee52d00a4e5ed83c2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..734aa96cd8df2ee52fc92885ce89bd63891e616a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_08c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c36b47500e51eda95fad68e7531ebc454248b4bb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_08c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..53d0fab7495fddab5eecfa722325db18e1a6b27e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_08c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8d994dd5a8ee97ccd5bc93354083adc8be5a4ff2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_08c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ac8daaf8b3ccd8613f3fa76d963811f070b8834e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_08c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e2e5fe42b13314e37bdfde7ef0a1e7a293ff2d3b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_08c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4ad5a3f7c2bcf2486335aea96b3f28ab49955fca --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_08c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..16994446dcda17daffc5b8a5e64518465e649df4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_08c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..23e327d1187106a7cac2f39725345127d70fab31 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_08c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9515d77d661bdeafc5a38725237af6a84ed87226 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_08c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d6187d9119312d32275942d123c54b7df4593116 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_08c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..00a2136cf0b841807b5a123ffe925c1141bca3ab --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_08c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..898eb37e801f02f7c76b20881cd8df9ace968e33 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_08c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f00eb9734f28eaa296f06672e102f980f69a490d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_08c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0ec582d609408ae0c1c52df49244988d1aa204a1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_08c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a06d4e2fdf9d57dbb6c0fa8fee0bf4e6542e62bf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_08c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b48618f3fbdab2c0051a223acfa9ff4dbb98afd2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_08c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8b0cfce7c878d3b919d4353fc5053dc024d0d6d9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_08c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8ef51c003d72b0bbab3ec4a16c9453c658f5666a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_08c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/_default_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/_default_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..4f73aa157fac13f053d95b10443b2270d5dace0e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/_default_template_yaml @@ -0,0 +1,13 @@ +dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}" +doc_to_choice: choices +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..83bce395836693348a05cf82dcccdb466e94f9fa --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_pv_01 +task: + - mmlu_alt_pv_01_stem + - mmlu_alt_pv_01_other + - mmlu_alt_pv_01_social_sciences + - mmlu_alt_pv_01_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6ecfa80758002a7dc12b27442e426d466ad905e1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ae8eb7a5c9af5481d0f6ca2f84cd87a3f4af3f15 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..688106b862970b382019504913c441beca8f6145 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..31a0f39a6ad69fd8a66f97d9cbad68f19e33fc18 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_pv_01_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b860ba1bac0ddbe1089d8e7221276acf71027a87 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_pv_01_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8c5464ca33ba22facdd12bd821fee120a1024488 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8938bd03e73c9ddc43af038f6af807e7e5c53419 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9f0829a8d00fae65798fb0e8cd492159a2e030f2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e0fde29e58f8d82e3e91c113a1f9f066d94cb964 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..98e29dc66a0ac2fd0db470a463e4250eac42f6d9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_pv_01_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a92494986373e82d8e67af69f6eb99432f07a242 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fb127bad6a8555f36a3314c5d30e15ab8adcbb80 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9d054a75bbe1a92bfd1ae755763a4949a8d84c91 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aa25237de32cef1197abe9cf2e347b954e010c52 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_pv_01_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..551c1544ea9a8c87589618477df39b27174a1ab4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e8034806d862f22144413bb286284b5d94a0b4c6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7517dd5d7d2f4432cfdbf5f4ffc2651ad23bcbd2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_pv_01_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..15f4a14e10f8b36c732934ae4cfd8e1b8f66b236 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_pv_01_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2d514e0bd03a7b78cd350c6b456d5c4e266f5071 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6d841bda5e4641079ed0d0fcb9f42bfa2ff5029c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9b7266fd343a6e61ec2260daa76e6b3e126eaa80 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d19c38745a9c460cad60b226596c6cf8cd278f6e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_pv_01_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a6693a49eccd2ada88af3866191f4949a15e82fa --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_pv_01_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6f6644d51facc994aa0b8931206745d1e3f4a539 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_pv_01_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dff29707e433885db1b506e3763cc511b8ab13df --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_pv_01_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..39ea098459b70a112ac8d03d101b9c0cc3947296 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..327756b1805a83837aaea27ea5347bdee4c58fea --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_pv_01_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..56c5999bafd6b9bc04b9625ee67b463ee8aa4dbb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..22c5c6ef084c031b88c522eee39f2d0f74f86473 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_pv_01_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4a7e279f79bb80fdc120150bfc374a68a6428910 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..223c548abca201b520668378b924003fceb9831d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_pv_01_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ba3d32d8b8638785017bdfaecb8f98ac5d5e3d47 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_pv_01_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..320dcb010a3ec27a992250ee967c52cb25be29bf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_pv_01_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e21ee56f003e74d0f2053e0c442a5d0830426421 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_pv_01_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d1e4e13a45550c483127f491b118c3903fba5c5f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_pv_01_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c28b447d60ee262fc97268be710cbcc7861a02e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_pv_01_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..36d1cb5b43b4298dd92d7653d18b0e973d5647df --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_pv_01_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a56ea1ba0a665104107bb275a61b027c06645cb8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fa362939b059884e6d61b2bceede701c5756c272 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_pv_01_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..635afd3b68bf81a5d64904f7f9676da859914cd4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_pv_01_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ac06e3df7a81fd2be5e8a1a19a1b75c040d90669 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_pv_01_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9dfeadb3d68a5c0dcc2ffa0d98c0f8e6cc4188d9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_pv_01_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e74db2cb9321e075265f93eb5a7529e04317f0bd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_pv_01_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0eb99f43b42e4df58e2a96d2a4ad8b9de37386e2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_pv_01_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a097ddbd16b757d4d81045d8e920407efa8431aa --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_pv_01_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f0b463de9381ee64bb59bb760272a7e78d9b9a59 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_pv_01_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..588fbacb7f74abfa32f72dd5ab57fd92555f0746 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_pv_01_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5d58389bd05eb6433588f5b891da2fb25becf724 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_pv_01_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b7681cfdd5e7492e525a9212161f8a827a27a841 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_pv_01_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0d26d9d3bc34c374b21ffc86c8ea6ea4403f7958 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_pv_01_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7a979cf0957dbe213b7441b2b42c156c74a195f0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_pv_01_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..87a8a946fab59cd111859114c187cf545f51a1db --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_pv_01_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a89be56e7306b5c92f185d77739a4db3c69da60a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_pv_01_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7e2bc065410bfb3986fd4510cedb20c2e53785df --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_pv_01_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..02016eb6b1cfc452b3b76772c1778dc988bf2df0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_pv_01_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d08227a2ed873a72ff01852dd5db4948281204b4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_pv_01_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..918db68b7058e89c1012777acb4d00247bf4dc76 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_pv_01_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/_default_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/_default_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..083fb599b26cfe36ac38d00ace4c5208cf2ebd66 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/_default_template_yaml @@ -0,0 +1,13 @@ +dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "Q: {{question.strip()}}\nA:" +doc_to_choice: choices +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c0ec579b4146fd16887790f56720dbf03fb5dea5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_pv_02 +task: + - mmlu_alt_pv_02_stem + - mmlu_alt_pv_02_other + - mmlu_alt_pv_02_social_sciences + - mmlu_alt_pv_02_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8bd0799500c340a8d5addec078a443a91ae9f89d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cdc0dbce1efb0e36a382f0191b9cd299b65c3861 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..484cbe3766957721bd3489ab330985f635f55261 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e626b34a3bd2c02ae7986e9ba3993edf95e1d800 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_pv_02_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..547e30675ae0cc8e2b13f192aa2a055e94351970 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_pv_02_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2127089ab372ea14daa1eede324e46c5e726d774 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..75103c15eddecd58d4e220171825e47ca9c44116 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..49e60548c1bb5ec9550d75601ec631f313d108d1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c450b9a652c23f47018c32e769a656ad5f84c73a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..695e45beb544ae8cf7b336e1621569cec2653422 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_pv_02_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..db409508b00dbfd3ccc3aa6f89a07cdd63a3d955 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3bad6297c136ba9d5a2a569c19f5e28c61139803 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cfc363ae39689472751c09f2f7b113dbcafc8462 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..94aeee690dcec89256bfa7307c2017cf67c801a9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_pv_02_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..189fdc9d105b33ba8797f939843d93b7c94a0b01 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9ebbb836feef15fc86f75232449c62a4d4133d26 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7aa10a1277fad1319ea0596fd41bafd9e70884fa --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_pv_02_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..180635a1f1405d701a01e3ca0c0aab6adcb300f6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_pv_02_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..50ea44964eb1dba848521fd195081fe73982eff7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..822799005c06e2009c080e13769a7911c5ef7d11 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5d48b5079fe023d251dd0bcf61001312af98b7b8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..78c6ace625087d046303bfcb1c22799b821feb3f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_pv_02_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bfc505c3206b5b0a9e5b7f12720e6c18025714e3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_pv_02_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4581c806c727a7be16f0474057beb803fd37b1f1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_pv_02_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0ac0d5e1937a54a23b3605fbadd9ca40226712a6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_pv_02_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ddbd5349ba8d34dcda093269c99cdb40e863bca2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9dddda2fedfaa811d711b1281c6ad9b9ae7c28b6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_pv_02_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3337914e85fb17b1b06f175f5a908aa2fb33a03c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b32de65e055ccaa6d21c48654d16da967c7656d8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_pv_02_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c9716f9807a8f2395389addc0a2a6e16aff1da2b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..292b857ef7b8f8e234845aaf3df37ccd3f2e2d69 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_pv_02_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2449fc17ad0066d5a8f0fff99155be1b21edd052 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_pv_02_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a7c912ef38d1c8a66926a8dc72da497187c01484 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_pv_02_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1cc26aaec5c9ea3852827b3842f1a08fb69abeca --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_pv_02_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..55859077f6f1208e0d59550a13fb6e2befa289b8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_pv_02_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5273a53ea32ea7199d2c11eea709c69f0f224f89 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_pv_02_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..822c67dcd3598f873cc6556c46d7b1a7f2c5ce26 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_pv_02_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0a3bb9d51dce76e58365b8756e8b3b0a9df52082 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e4af9ba0fbf503e9b567da17cf08b4f07cbc6f11 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_pv_02_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1a23ba708168dd7cfb58f22f56e4719dfd9755db --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_pv_02_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c47926728d3f1a5c17a0a94ce4b286a8c392212e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_pv_02_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f37e75ce3eb29107af354590ee3bcb2e1bb2d642 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_pv_02_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0657ff03a58eab651848e1bf232ce741ba6b51ef --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_pv_02_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4774d8175465b7f9e86205aa9464435333e073f7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_pv_02_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ccc9c92278c5c958fbad1de2bef11e882be596c3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_pv_02_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2daae4eeb6962627334f6cf472bbf0fbbe532784 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_pv_02_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ba217bbe5948b3cb925a8fec78c206504685bf69 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_pv_02_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b9cbc5f78366708a058c4c892d9d3d6d8f67cdf5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_pv_02_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..15828b996dc4543f2d2bce955118b45d43d40e99 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_pv_02_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a6800e9bb7519b2b66509355f9769b2bc97fc9a2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_pv_02_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3dc609c2885b20e2687254b1f519a0921c05307f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_pv_02_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c798918b5eba227e4b8d7a4bbae82655df1dee4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_pv_02_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1b5086bb0b801892e4b3c7269b062599a794eda3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_pv_02_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..726bf086b6b0ad853f50422eccbab9bd3a0cb626 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_pv_02_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..80962e7efc32a2aae1f47c612cd1874c76b1b370 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_pv_02_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..66cd00c451611c4f746126605645acd6f352369f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_pv_02_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..92dd159af08ecc539c0c9cd20a1a78322eb7e58d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_pv_02_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/_default_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/_default_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..07f9830438efab8a3e2eb4a72a4472eeab867fac --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/_default_template_yaml @@ -0,0 +1,13 @@ +dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "Question: {{question.strip()}}\nAnswer:" +doc_to_choice: choices +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..39e434a5d956d65a542cc5efb7057cd8cd9a516b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_pv_03 +task: + - mmlu_alt_pv_03_stem + - mmlu_alt_pv_03_other + - mmlu_alt_pv_03_social_sciences + - mmlu_alt_pv_03_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7941d4a8b47f7e6996e686a9fab405cef6b4763d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6d04c32106e7ea1eccb9295ad84e5e047c4f2d75 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d31997eb95d2ca554c3c5d7c02257cf1813ba110 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aea729b4fcaca8d688ca624e575f8e0c7aa0f7bd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_pv_03_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..949c5fe4635675069756dcb6c332ea1195a36041 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_pv_03_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f2a947f2776bd391772629ba2a09209640c6167d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..29e01ad45cee9e95fee0499e43ea58dfe0bd1aa8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ae23319af2e9f5231e98fc20bd852fecd5b03476 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b349a40a077b64bd681e8f67df53662f424038f1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ccc5fd2b44945cdbe27c5475993b3f43cd5f32bb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_pv_03_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cdb1838347f552c38fb5ab96cedcda80800ac3a6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b2ef889d0d9da1ea276ed5ecbc493e0845308ae5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..215171248dd4356258c306f67c3a7036d5ac40f9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c42e5ca8b12ea3aa910acd24d366173e4441d57a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_pv_03_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b3c61ff8c406c824a3a92b47f900c42bd489e8af --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b533bbc1d9ed0a6f970271cbb609f4a651be52fa --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..02e4314bb121c5ab1eb4c3a77bc62095f5fabce4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_pv_03_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..508ddfeeedb44ecb14f0342f21a7a0503dc20b21 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_pv_03_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d12e03e3697edf294cc8dbdb9dc20ce9f53f0546 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a7d6d032ccfcd652a6ee99847f4e31c97529991e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d909a678cb7f564d07323d2b16540dd3907eacbf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..03c4df82da93f5b3ad82a32fa0da45f9ab1179a1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_pv_03_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1231d6931dec0541b86b3f4aa4a7553e60b24aa1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_pv_03_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2bb26aff70de9ff39f4143a9a02f3bf4c40d58e3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_pv_03_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..62db2eafb37102e57dceda8c2fe31254b6c93ee4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_pv_03_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8a690a27179884b6c35308755a2cd9321e959fa6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e924615db8e04f8c7259dcdad3ddb63f526dfd31 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_pv_03_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..095d99648f5a8f0cbd50c14b796d0834fc5b4cd3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a09577672b656f1902821a31e0113e53367aa649 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_pv_03_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2a7ddd5b533594886be37da5146a7d8d1a2b9235 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7c6e9d12af8128d7cb5acceeaa9c32e1856270dd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_pv_03_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..77107f320d231b2b423d10199237bd316b9aad20 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_pv_03_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9aeaece1043f0551c12bc4af475cb2288367fd09 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_pv_03_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0dd688bf3a49fdcf72fcd4b7b302f64c977595d4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_pv_03_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d284aa6cd0f0b542a46112a31c7331f678b6ab9c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_pv_03_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..43ecb1c68fa75de082a16bc9e2c417a80b813e78 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_pv_03_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..89dcf27dcb43f6a6c1077a79f440bbfac41a3f03 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_pv_03_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1131165ec0886514cc83fa03829bda821a5e461c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..72400f1f07f02e3cc3882ee9d745ede0f439ef0b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_pv_03_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..52332eed6c38ffa781565f4ad2d6cb555ad9a553 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_pv_03_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8efa7e9cbf5534b8287dbca5d0483f8ba7713188 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_pv_03_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..39b9ae770d0dc829ac31e61ca3e8051036cfc988 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_pv_03_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eb08c77788af9f4bc868a3d4d819022c300bf02c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_pv_03_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..84cc1692dc18ac209c828fc2690dda6304926086 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_pv_03_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c5f4a05fe648eb17275614ff73aea7c9fe86d9f7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_pv_03_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a7c3408dcad82345d1f1c8f44a4eed2b51e43882 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_pv_03_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..20294008b3a89b12be73510f28c260e70bfc1c71 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_pv_03_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..13f6cefb058577a4d096d24ccdcfc3bd27b9c002 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_pv_03_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..461f5ba1941b7923dbc04dae0594a71ae8c8ae9b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_pv_03_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f0a90066a76a5723dffa6d35189a1467e9a5d39f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_pv_03_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f61d3e40cc8a6d1a78bef4b2e7361eaeb69d88c9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_pv_03_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2b2e2a976b7250032ee388a3a5aeb92416ffd868 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_pv_03_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3e8896d4203ca1fa05f30e31e14fb74d64c543cc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_pv_03_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d808d4ec88699d188702d9602c3982669fc2f1d8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_pv_03_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f3c683198bb2cb8db48f8ebafc5427c9b51f9d3a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_pv_03_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d710576d9358478002de732bb30e9d8b17db4209 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_pv_03_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e8367f96fba82436751f78d41add4ff8c2a15fef --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_pv_03_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/styles.py b/lm_eval/tasks/mmlu/alternative_worlds/styles.py new file mode 100644 index 0000000000000000000000000000000000000000..c7a33ea0201f7b94c1e7080263287f9ce7b1f615 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/styles.py @@ -0,0 +1,89 @@ +import string +from functools import partial + + +def doc_to_text_base(alphabet, style, doc): + + choices = doc["choices"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + + if "\t" in style: + choice_string = "{}{}" + else: + choice_string = "{} {}" + + doc_to_text = "\n\n".join( + [ + "Question: " + doc["question"].strip() + "\nAnswer:", + ] + + [choice_string.format(i, j) for i, j in zip(letter_list, choices)] + ) + + return doc_to_text + + +# Full continuation +def choice_A(doc): + return doc["choices"] + + +# Letters only +def choice_B(alphabet, style, doc): + + choices = doc["choices"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" in style: + letter_list = [letter.replace("\t", "") for letter in letter_list] + + return letter_list + + +# Letters + Full continuation +def choice_C(alphabet, style, doc): + + choices = doc["choices"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" not in style: + letter_list = [letter + " " for letter in letter_list] + + return [letter + choice for letter, choice in zip(letter_list, choices)] + + +template_01 = partial(doc_to_text_base, string.ascii_lowercase, "({})") +choice_01a = choice_A +choice_01b = partial(choice_B, string.ascii_lowercase, "({})") +choice_01c = partial(choice_C, string.ascii_lowercase, "({})") +template_02 = partial(doc_to_text_base, string.ascii_lowercase, "{})") +choice_02a = choice_A +choice_02b = partial(choice_B, string.ascii_lowercase, "{})") +choice_02c = partial(choice_C, string.ascii_lowercase, "{})") +template_03 = partial(doc_to_text_base, string.ascii_lowercase, "{}.") +choice_03a = choice_A +choice_03b = partial(choice_B, string.ascii_lowercase, "{}.") +choice_03c = partial(choice_C, string.ascii_lowercase, "{}.") +template_04 = partial(doc_to_text_base, string.ascii_lowercase, "{}\t") +choice_04a = choice_A +choice_04b = partial(choice_B, string.ascii_lowercase, "{}\t") +choice_04c = partial(choice_C, string.ascii_lowercase, "{}\t") +template_05 = partial(doc_to_text_base, string.ascii_uppercase, "({})") +choice_05a = choice_A +choice_05b = partial(choice_B, string.ascii_uppercase, "({})") +choice_05c = partial(choice_C, string.ascii_uppercase, "({})") +template_06 = partial(doc_to_text_base, string.ascii_uppercase, "{})") +choice_06a = choice_A +choice_06b = partial(choice_B, string.ascii_uppercase, "{})") +choice_06c = partial(choice_C, string.ascii_uppercase, "{})") +template_07 = partial(doc_to_text_base, string.ascii_uppercase, "{}.") +choice_07a = choice_A +choice_07b = partial(choice_B, string.ascii_uppercase, "{}.") +choice_07c = partial(choice_C, string.ascii_uppercase, "{}.") +template_08 = partial(doc_to_text_base, string.ascii_uppercase, "{}\t") +choice_08a = choice_A +choice_08b = partial(choice_B, string.ascii_uppercase, "{}\t") +choice_08c = partial(choice_C, string.ascii_uppercase, "{}\t") diff --git a/lm_eval/tasks/piqa/alternative_worlds/README.md b/lm_eval/tasks/piqa/alternative_worlds/README.md new file mode 100644 index 0000000000000000000000000000000000000000..93600ae181761370baf6c9fdce7860af5737a978 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/README.md @@ -0,0 +1,20 @@ + + +Investigate affect of letter options +- (A) +- A) +- A. +- A\t +- (a) +- a) +- a. +- a\t + +Answer types: +- letters only + - original option + - just letter +- letters + continuation + - original option + - just letter +- continuation diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/_piqa_alt_ov_yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/_piqa_alt_ov_yaml new file mode 100644 index 0000000000000000000000000000000000000000..d26d2ae34640bc05e4233f51608fde4cd064da8b --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/_piqa_alt_ov_yaml @@ -0,0 +1,20 @@ +dataset_path: piqa +output_type: multiple_choice +training_split: train +validation_split: validation +doc_to_text: "Question: {{goal}}\nAnswer:" +doc_to_target: label +doc_to_choice: "{{[sol1, sol2]}}" +should_decontaminate: true +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: brier_score + aggregation: brier_score + higher_is_better: false +metadata: + - version: 1.0 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/piqa_alt_ov.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/piqa_alt_ov.yaml new file mode 100644 index 0000000000000000000000000000000000000000..da917426b348139549a07e94b0fd1e5108df24fb --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/piqa_alt_ov.yaml @@ -0,0 +1,10 @@ +group: piqa_alt_ov +task: + - piqa_alt_ov_01 + - piqa_alt_ov_02 + - piqa_alt_ov_03 + - piqa_alt_ov_04 + - piqa_alt_ov_05 + - piqa_alt_ov_06 + - piqa_alt_ov_07 + - piqa_alt_ov_08 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_01/a.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_01/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a19793a7b4d8d0f3c3f4ac99f7a0cf0352496531 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_01/a.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_01 +task: piqa_alt_ov_01a +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01a +doc_to_decontamination_query: !function ../styles.template_01 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_01/b.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_01/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6ae4b304170b14e0ab43145f70577acdbd80a12a --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_01/b.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_01 +task: piqa_alt_ov_01b +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01b +doc_to_decontamination_query: !function ../styles.template_01 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_01/c.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_01/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..84cbd654868aa3d49609439646fc684556294657 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_01/c.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_01 +task: piqa_alt_ov_01c +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01c +doc_to_decontamination_query: !function ../styles.template_01 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_02/a.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_02/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ad48ca783cdbb57205b4326363580860e108fbde --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_02/a.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_02 +task: piqa_alt_ov_02a +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02a +doc_to_decontamination_query: !function ../styles.template_02 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_02/b.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_02/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a7c2b84a7caa1b0dab74721992dd22f3f6337a73 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_02/b.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_02 +task: piqa_alt_ov_02b +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02b +doc_to_decontamination_query: !function ../styles.template_02 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_02/c.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_02/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e2f45ff9636701ac0524f41411bd27cfb0b0d2cd --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_02/c.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_02 +task: piqa_alt_ov_02c +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02c +doc_to_decontamination_query: !function ../styles.template_02 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_03/a.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_03/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..70a5ec8c494f151e66a49c12fcb5ca775f44ebae --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_03/a.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_03 +task: piqa_alt_ov_03a +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03a +doc_to_decontamination_query: !function ../styles.template_03 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_03/b.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_03/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..af73914b171cb1443a87249fab169748bcbbf160 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_03/b.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_03 +task: piqa_alt_ov_03b +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03b +doc_to_decontamination_query: !function ../styles.template_03 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_03/c.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_03/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3484705fc00b332e15b7ad36b6bcfe8d47933b34 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_03/c.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_03 +task: piqa_alt_ov_03c +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03c +doc_to_decontamination_query: !function ../styles.template_03 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_04/a.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_04/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..79a364488a17bc21da6c043b1545fc2574a88949 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_04/a.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_04 +task: piqa_alt_ov_04a +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04a +doc_to_decontamination_query: !function ../styles.template_04 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_04/b.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_04/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..36b3347cfe304f675f9534932c55e9d611e36bc9 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_04/b.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_04 +task: piqa_alt_ov_04b +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04b +doc_to_decontamination_query: !function ../styles.template_04 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_04/c.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_04/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3e62b4563508e4f2e51a53eb116e792bc70608f4 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_04/c.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_04 +task: piqa_alt_ov_04c +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04c +doc_to_decontamination_query: !function ../styles.template_04 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_05/a.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_05/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..54c4a9807fc164b127e5f7336662f858bdada997 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_05/a.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_05 +task: piqa_alt_ov_05a +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05a +doc_to_decontamination_query: !function ../styles.template_05 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_05/b.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_05/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2954cc532949fb38239e0f45f7118268c3e5bfc7 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_05/b.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_05 +task: piqa_alt_ov_05b +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05b +doc_to_decontamination_query: !function ../styles.template_05 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_05/c.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_05/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b604e1d2462e001a8feed1bc55a21f59765bfb61 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_05/c.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_05 +task: piqa_alt_ov_05c +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05c +doc_to_decontamination_query: !function ../styles.template_05 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_06/a.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_06/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d0bc222dafaa18fd551b4b547c24f4f552632019 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_06/a.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_06 +task: piqa_alt_ov_06a +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06a +doc_to_decontamination_query: !function ../styles.template_06 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_06/b.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_06/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..393db4a002eed14a8c72a5ce79d2e593a3438602 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_06/b.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_06 +task: piqa_alt_ov_06b +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06b +doc_to_decontamination_query: !function ../styles.template_06 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_06/c.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_06/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2be7bf0d08679b9005aecf9c554b4c371668cbc8 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_06/c.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_06 +task: piqa_alt_ov_06c +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06c +doc_to_decontamination_query: !function ../styles.template_06 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_07/a.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_07/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4a95f9e8c2d538dca52190f9b2612bb760490da5 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_07/a.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_07 +task: piqa_alt_ov_07a +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07a +doc_to_decontamination_query: !function ../styles.template_07 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_07/b.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_07/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b6bc043299aa48de9676bf90d14e3928bada2854 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_07/b.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_07 +task: piqa_alt_ov_07b +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07b +doc_to_decontamination_query: !function ../styles.template_07 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_07/c.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_07/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..113fb52bc161a55de506a27e1a5ee2acb5101e67 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_07/c.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_07 +task: piqa_alt_ov_07c +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07c +doc_to_decontamination_query: !function ../styles.template_07 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_08/a.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_08/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7f56c485a621a34172db4e2bd596210f1dc81a43 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_08/a.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_08 +task: piqa_alt_ov_08a +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08a +doc_to_decontamination_query: !function ../styles.template_08 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_08/b.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_08/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4458cd502a2a0c0bf29d86359d729dfd004636b2 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_08/b.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_08 +task: piqa_alt_ov_08b +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08b +doc_to_decontamination_query: !function ../styles.template_08 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_08/c.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_08/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..01e7748725b031b8289e658b2471f6da30be4e25 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_08/c.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_08 +task: piqa_alt_ov_08c +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08c +doc_to_decontamination_query: !function ../styles.template_08 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/styles.py b/lm_eval/tasks/piqa/alternative_worlds/output_variation/styles.py new file mode 100644 index 0000000000000000000000000000000000000000..68447e2d01874105cdf7fd8204b9951568ac3f05 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/styles.py @@ -0,0 +1,90 @@ +import string +from functools import partial + + +def doc_to_text_base(alphabet, style, doc): + + choices = [doc["sol1"], doc["sol2"]] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + + if "\t" in style: + choice_string = "{}{}" + else: + choice_string = "{} {}" + + doc_to_text = "\n".join( + [ + "Question: " + doc["goal"], + ] + + [choice_string.format(i, j) for i, j in zip(letter_list, choices)] + + ["Answer:"] + ) + + return doc_to_text + + +# Full continuation +def choice_A(doc): + return [doc["sol1"], doc["sol2"]] + + +# Letters only +def choice_B(alphabet, style, doc): + + choices = [doc["sol1"], doc["sol2"]] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" in style: + letter_list = [letter.replace("\t", "") for letter in letter_list] + + return letter_list + + +# Letters + Full continuation +def choice_C(alphabet, style, doc): + + choices = [doc["sol1"], doc["sol2"]] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" not in style: + letter_list = [letter + " " for letter in letter_list] + + return [letter + choice for letter, choice in zip(letter_list, choices)] + + +template_01 = partial(doc_to_text_base, string.ascii_lowercase, "({})") +choice_01a = choice_A +choice_01b = partial(choice_B, string.ascii_lowercase, "({})") +choice_01c = partial(choice_C, string.ascii_lowercase, "({})") +template_02 = partial(doc_to_text_base, string.ascii_lowercase, "{})") +choice_02a = choice_A +choice_02b = partial(choice_B, string.ascii_lowercase, "{})") +choice_02c = partial(choice_C, string.ascii_lowercase, "{})") +template_03 = partial(doc_to_text_base, string.ascii_lowercase, "{}.") +choice_03a = choice_A +choice_03b = partial(choice_B, string.ascii_lowercase, "{}.") +choice_03c = partial(choice_C, string.ascii_lowercase, "{}.") +template_04 = partial(doc_to_text_base, string.ascii_lowercase, "{}\t") +choice_04a = choice_A +choice_04b = partial(choice_B, string.ascii_lowercase, "{}\t") +choice_04c = partial(choice_C, string.ascii_lowercase, "{}\t") +template_05 = partial(doc_to_text_base, string.ascii_uppercase, "({})") +choice_05a = choice_A +choice_05b = partial(choice_B, string.ascii_uppercase, "({})") +choice_05c = partial(choice_C, string.ascii_uppercase, "({})") +template_06 = partial(doc_to_text_base, string.ascii_uppercase, "{})") +choice_06a = choice_A +choice_06b = partial(choice_B, string.ascii_uppercase, "{})") +choice_06c = partial(choice_C, string.ascii_uppercase, "{})") +template_07 = partial(doc_to_text_base, string.ascii_uppercase, "{}.") +choice_07a = choice_A +choice_07b = partial(choice_B, string.ascii_uppercase, "{}.") +choice_07c = partial(choice_C, string.ascii_uppercase, "{}.") +template_08 = partial(doc_to_text_base, string.ascii_uppercase, "{}\t") +choice_08a = choice_A +choice_08b = partial(choice_B, string.ascii_uppercase, "{}\t") +choice_08c = partial(choice_C, string.ascii_uppercase, "{}\t") diff --git a/lm_eval/tasks/piqa/alternative_worlds/prompt_variation/_piqa_yaml b/lm_eval/tasks/piqa/alternative_worlds/prompt_variation/_piqa_yaml new file mode 100644 index 0000000000000000000000000000000000000000..eb1580c7984c8dd8a60fc8acc8d06a0ba28d051d --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/prompt_variation/_piqa_yaml @@ -0,0 +1,22 @@ +dataset_path: piqa +dataset_name: null +output_type: multiple_choice +training_split: train +validation_split: validation +doc_to_text: "Question: {{goal}}\nAnswer:" +doc_to_target: label +doc_to_choice: "{{[sol1, sol2]}}" +should_decontaminate: true +doc_to_decontamination_query: goal +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: brier_score + aggregation: brier_score + higher_is_better: false +metadata: + - version: 1.0 diff --git a/lm_eval/tasks/piqa/alternative_worlds/prompt_variation/style_01.yaml b/lm_eval/tasks/piqa/alternative_worlds/prompt_variation/style_01.yaml new file mode 100644 index 0000000000000000000000000000000000000000..563c551dd76f67e2123c255d0bb6ebd699715a2c --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/prompt_variation/style_01.yaml @@ -0,0 +1,5 @@ +include: _piqa_yaml +group: piqa_alt_pv +task: piqa_alt_pv_01 +doc_to_text: "{{goal}}" +doc_to_decontamination_query: "{{goal}}" diff --git a/lm_eval/tasks/piqa/alternative_worlds/prompt_variation/style_02.yaml b/lm_eval/tasks/piqa/alternative_worlds/prompt_variation/style_02.yaml new file mode 100644 index 0000000000000000000000000000000000000000..32e1fe7618e1703c3a64395844c45b6b8459004f --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/prompt_variation/style_02.yaml @@ -0,0 +1,5 @@ +include: _piqa_yaml +group: piqa_alt_pv +task: piqa_alt_pv_02 +doc_to_text: "Q: {{goal}}\nA:" +doc_to_decontamination_query: "Q: {{goal}}\nA:" diff --git a/lm_eval/tasks/piqa/alternative_worlds/prompt_variation/style_03.yaml b/lm_eval/tasks/piqa/alternative_worlds/prompt_variation/style_03.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7bb8bfef7c0e647f553eea8c40bc03b58557aff3 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/prompt_variation/style_03.yaml @@ -0,0 +1,5 @@ +include: _piqa_yaml +group: piqa_alt_pv +task: piqa_alt_pv_03 +doc_to_text: "Question: {{goal}}\nAnswer:" +doc_to_decontamination_query: "Question: {{goal}}\nAnswer:" diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/_sciq_alt_yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/_sciq_alt_yaml new file mode 100644 index 0000000000000000000000000000000000000000..f4241098fd3c7162784720d80a2be5020c1b1877 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/_sciq_alt_yaml @@ -0,0 +1,23 @@ +group: + - ai2_arc +task: sciq +dataset_path: ai2_arc +dataset_name: ARC-Easy +output_type: multiple_choice +training_split: train +validation_split: validation +test_split: test +doc_to_text: "Question: {{question}}\nAnswer:" +doc_to_target: "{{choices.label.index(answerKey)}}" +doc_to_choice: "{{choices.text}}" +should_decontaminate: true +doc_to_decontamination_query: "Question: {{question}}\nAnswer:" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: brier_score + higher_is_better: false diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/sciq_alt_ov.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/sciq_alt_ov.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6810b9db54fcc28d53eaa2f70c5819c7eb81fe37 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/sciq_alt_ov.yaml @@ -0,0 +1,10 @@ +group: sciq_alt_ov +task: + - sciq_alt_ov_01 + - sciq_alt_ov_02 + - sciq_alt_ov_03 + - sciq_alt_ov_04 + - sciq_alt_ov_05 + - sciq_alt_ov_06 + - sciq_alt_ov_07 + - sciq_alt_ov_08 diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_01/a.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_01/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4e57348aad7dae3f9c82d742fa2244ba1fd8895e --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_01/a.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_01 +task: sciq_alt_ov_01a +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01a diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_01/b.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_01/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f6f286803c2de871c5ef361db21b36f6c6ee94ce --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_01/b.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_01 +task: sciq_alt_ov_01b +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01b diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_01/c.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_01/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..49269ec1d6955ce1c72873d71333ee5698de2ba4 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_01/c.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_01 +task: sciq_alt_ov_01c +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01c diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_02/a.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_02/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..985a80fa864e55e40cac85cbaa58970e0cd1ff20 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_02/a.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_02 +task: sciq_alt_ov_02a +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02a diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_02/b.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_02/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..64aa22c7b979e392a8beb95eba00268fff5fc800 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_02/b.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_02 +task: sciq_alt_ov_02b +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02b diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_02/c.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_02/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2cf2f59f46975accffae464ba34e4196eea6eb13 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_02/c.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_02 +task: sciq_alt_ov_02c +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02c diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_03/a.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_03/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c65ccc19df1b7b5f865181b5983dcf8c8fad64c3 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_03/a.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_03 +task: sciq_alt_ov_03a +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03a diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_03/b.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_03/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0fc543416a1749d911e6ea3e6426f97a13ce21e7 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_03/b.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_03 +task: sciq_alt_ov_03b +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03b diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_03/c.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_03/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1ec41f3bb222897cb60993790cfc16c909a135d4 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_03/c.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_03 +task: sciq_alt_ov_03c +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03c diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_04/a.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_04/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..47e929ae4dcb9c5baadd6ae70cc4630fdc9ad2e0 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_04/a.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_04 +task: sciq_alt_ov_04a +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04a diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_04/b.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_04/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..805ffa8f81fe8d7816467a7b5ab8585e6c94dc5d --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_04/b.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_04 +task: sciq_alt_ov_04b +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04b diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_04/c.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_04/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..11749248dcdcdce24ebbe2437890959069f3d0e5 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_04/c.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_04 +task: sciq_alt_ov_04c +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04c diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_05/a.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_05/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1b2e153b895a57cbe08182e4591102cf874ad2c3 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_05/a.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_05 +task: sciq_alt_ov_05a +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05a diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_05/b.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_05/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0ae6f1faf85509abb6c1ce6081c32b8792e6f4d1 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_05/b.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_05 +task: sciq_alt_ov_05b +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05b diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_05/c.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_05/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..db4dcbfb5e2a6a32c2fdb564a48d1d49f80f246d --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_05/c.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_05 +task: sciq_alt_ov_05c +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05c diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_06/a.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_06/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9f34ac56305a302969121d2710f0f311499dcbf2 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_06/a.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_06 +task: sciq_alt_ov_06a +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06a diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_06/b.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_06/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1b86dbf9f72558875f06c5e9713e507e9385e64c --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_06/b.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_06 +task: sciq_alt_ov_06b +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06b diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_06/c.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_06/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f505f780c7dca2e9b74b3e8827eaa88cadfea89 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_06/c.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_06 +task: sciq_alt_ov_06c +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06c diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_07/a.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_07/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..325ecd9008050bbc00476dfdb0acd68a54b43f81 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_07/a.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_07 +task: sciq_alt_ov_07a +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07a diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_07/b.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_07/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cd50fd65d0b933edcb217f09623c6a088307654c --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_07/b.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_07 +task: sciq_alt_ov_07b +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07b diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_07/c.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_07/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..46f7888617b88ced374607b64aadd82f61afae61 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_07/c.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_07 +task: sciq_alt_ov_07c +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07c diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_08/a.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_08/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d744ce7aa27f99d0bf9e8fa891d39958f573b0e3 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_08/a.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_08 +task: sciq_alt_ov_08a +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08a diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_08/b.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_08/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..885761d5604a6142315e991b968d69ca78543a45 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_08/b.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_08 +task: sciq_alt_ov_08b +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08b diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_08/c.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_08/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..68e6037bea63fa210a39e025ade9ea5626a9c049 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_08/c.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_08 +task: sciq_alt_ov_08c +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08c diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/styles.py b/lm_eval/tasks/sciq/alternative_worlds/output_variation/styles.py new file mode 100644 index 0000000000000000000000000000000000000000..3d075517ccd6d698a35e53e32f3c2eb6ee6f5556 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/styles.py @@ -0,0 +1,89 @@ +import string +from functools import partial + + +def doc_to_text_base(alphabet, style, doc): + + choices = doc["choices"]["text"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + + if "\t" in style: + choice_string = "{}{}" + else: + choice_string = "{} {}" + + doc_to_text = "\n\n".join( + [ + "Question: " + doc["question"] + "\nAnswer:", + ] + + [choice_string.format(i, j) for i, j in zip(letter_list, choices)] + ) + + return doc_to_text + + +# Full continuation +def choice_A(doc): + return doc["choices"]["text"] + + +# Letters only +def choice_B(alphabet, style, doc): + + choices = doc["choices"]["text"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" in style: + letter_list = [letter.replace("\t", "") for letter in letter_list] + + return letter_list + + +# Letters + Full continuation +def choice_C(alphabet, style, doc): + + choices = doc["choices"]["text"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" not in style: + letter_list = [letter + " " for letter in letter_list] + + return [letter + choice for letter, choice in zip(letter_list, choices)] + + +template_01 = partial(doc_to_text_base, string.ascii_lowercase, "({})") +choice_01a = choice_A +choice_01b = partial(choice_B, string.ascii_lowercase, "({})") +choice_01c = partial(choice_C, string.ascii_lowercase, "({})") +template_02 = partial(doc_to_text_base, string.ascii_lowercase, "{})") +choice_02a = choice_A +choice_02b = partial(choice_B, string.ascii_lowercase, "{})") +choice_02c = partial(choice_C, string.ascii_lowercase, "{})") +template_03 = partial(doc_to_text_base, string.ascii_lowercase, "{}.") +choice_03a = choice_A +choice_03b = partial(choice_B, string.ascii_lowercase, "{}.") +choice_03c = partial(choice_C, string.ascii_lowercase, "{}.") +template_04 = partial(doc_to_text_base, string.ascii_lowercase, "{}\t") +choice_04a = choice_A +choice_04b = partial(choice_B, string.ascii_lowercase, "{}\t") +choice_04c = partial(choice_C, string.ascii_lowercase, "{}\t") +template_05 = partial(doc_to_text_base, string.ascii_uppercase, "({})") +choice_05a = choice_A +choice_05b = partial(choice_B, string.ascii_uppercase, "({})") +choice_05c = partial(choice_C, string.ascii_uppercase, "({})") +template_06 = partial(doc_to_text_base, string.ascii_uppercase, "{})") +choice_06a = choice_A +choice_06b = partial(choice_B, string.ascii_uppercase, "{})") +choice_06c = partial(choice_C, string.ascii_uppercase, "{})") +template_07 = partial(doc_to_text_base, string.ascii_uppercase, "{}.") +choice_07a = choice_A +choice_07b = partial(choice_B, string.ascii_uppercase, "{}.") +choice_07c = partial(choice_C, string.ascii_uppercase, "{}.") +template_08 = partial(doc_to_text_base, string.ascii_uppercase, "{}\t") +choice_08a = choice_A +choice_08b = partial(choice_B, string.ascii_uppercase, "{}\t") +choice_08c = partial(choice_C, string.ascii_uppercase, "{}\t") diff --git a/lm_eval/tasks/sciq/alternative_worlds/prompt_variation/style_01.yaml b/lm_eval/tasks/sciq/alternative_worlds/prompt_variation/style_01.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c653b2d950e64e3c0670581506687aab553734f7 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/prompt_variation/style_01.yaml @@ -0,0 +1,17 @@ +group: sciq_alt_pv +task: sciq_alt_pv_01 +dataset_path: sciq +dataset_name: null +output_type: multiple_choice +training_split: train +validation_split: validation +test_split: test +doc_to_text: "{{support.lstrip()}}\n{{question}}" +doc_to_target: 3 +doc_to_choice: "{{[distractor1, distractor2, distractor3, correct_answer]}}" +should_decontaminate: true +doc_to_decontamination_query: "{{support}} {{question}}" +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/sciq/alternative_worlds/prompt_variation/style_02.yaml b/lm_eval/tasks/sciq/alternative_worlds/prompt_variation/style_02.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a9498269ac706840fdb0a6ca58c4b13c2de4a69d --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/prompt_variation/style_02.yaml @@ -0,0 +1,17 @@ +group: sciq_alt_pv +task: sciq_alt_pv_02 +dataset_path: sciq +dataset_name: null +output_type: multiple_choice +training_split: train +validation_split: validation +test_split: test +doc_to_text: "{{support.lstrip()}}\nQ: {{question}}\nA:" +doc_to_target: 3 +doc_to_choice: "{{[distractor1, distractor2, distractor3, correct_answer]}}" +should_decontaminate: true +doc_to_decontamination_query: "{{support}} {{question}}" +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/sciq/alternative_worlds/prompt_variation/style_03.yaml b/lm_eval/tasks/sciq/alternative_worlds/prompt_variation/style_03.yaml new file mode 100644 index 0000000000000000000000000000000000000000..12049228ad4aded2699c2d7c1f71d912d76a55f6 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/prompt_variation/style_03.yaml @@ -0,0 +1,17 @@ +group: sciq_alt_pv +task: sciq_alt_pv_03 +dataset_path: sciq +dataset_name: null +output_type: multiple_choice +training_split: train +validation_split: validation +test_split: test +doc_to_text: "{{support.lstrip()}}\nQuestion: {{question}}\nAnswer:" +doc_to_target: 3 +doc_to_choice: "{{[distractor1, distractor2, distractor3, correct_answer]}}" +should_decontaminate: true +doc_to_decontamination_query: "{{support}} {{question}}" +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/siqa/alternative_worlds/README.md b/lm_eval/tasks/siqa/alternative_worlds/README.md new file mode 100644 index 0000000000000000000000000000000000000000..93600ae181761370baf6c9fdce7860af5737a978 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/README.md @@ -0,0 +1,20 @@ + + +Investigate affect of letter options +- (A) +- A) +- A. +- A\t +- (a) +- a) +- a. +- a\t + +Answer types: +- letters only + - original option + - just letter +- letters + continuation + - original option + - just letter +- continuation diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/_siqa_alt_yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/_siqa_alt_yaml new file mode 100644 index 0000000000000000000000000000000000000000..157c7389f1e0d014c3c9fa7452fbf0a85456b2d5 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/_siqa_alt_yaml @@ -0,0 +1,19 @@ +task: social_iqa +dataset_path: social_i_qa +dataset_name: null +output_type: multiple_choice +training_split: train +validation_split: validation +doc_to_choice: ["{{answerA}}", "{{answerB}}", "{{answerC}}"] +doc_to_target: "{{label}}" +should_decontaminate: true +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: brier_score + aggregation: brier_score + higher_is_better: false diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/siqa_alt.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/siqa_alt.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4d7079ce2a33a40661b02d42a79ecf8d6e5d7f83 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/siqa_alt.yaml @@ -0,0 +1,10 @@ +group: siqa_alt_ov +task: + - siqa_alt_ov_01 + - siqa_alt_ov_02 + - siqa_alt_ov_03 + - siqa_alt_ov_04 + - siqa_alt_ov_05 + - siqa_alt_ov_06 + - siqa_alt_ov_07 + - siqa_alt_ov_08 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_01/a.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_01/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9d7917405251199ac79855b48e5ebf008ed61257 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_01/a.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_01 +task: siqa_alt_ov_01a +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01a +doc_to_decontamination_query: !function ../styles.template_01 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_01/b.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_01/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b942970f818aa99da8f7088b2965b132de15068e --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_01/b.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_01 +task: siqa_alt_ov_01b +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01b +doc_to_decontamination_query: !function ../styles.template_01 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_01/c.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_01/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a4bb701dbb4e8a8ed80731b64c334c2947d85711 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_01/c.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_01 +task: siqa_alt_ov_01c +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01c +doc_to_decontamination_query: !function ../styles.template_01 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_02/a.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_02/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..94477b978a0380acacebdee4d09c33b06b54b80f --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_02/a.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_02 +task: siqa_alt_ov_02a +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02a +doc_to_decontamination_query: !function ../styles.template_02 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_02/b.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_02/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a57d7d15e3839a10ec3a32c110fa81f5e8ab9c55 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_02/b.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_02 +task: siqa_alt_ov_02b +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02b +doc_to_decontamination_query: !function ../styles.template_02 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_02/c.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_02/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5a47424a1fef3a40beba8819e4777f6a1c3b24a9 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_02/c.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_02 +task: siqa_alt_ov_02c +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02c +doc_to_decontamination_query: !function ../styles.template_02 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_03/a.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_03/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5eeabdd3333e17d6c17e66f949b80f9474fe7278 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_03/a.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_03 +task: siqa_alt_ov_03a +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03a +doc_to_decontamination_query: !function ../styles.template_03 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_03/b.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_03/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0b39ea717d01fa5427c3bfe427c56e37fcb7ef93 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_03/b.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_03 +task: siqa_alt_ov_03b +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03b +doc_to_decontamination_query: !function ../styles.template_03 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_03/c.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_03/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cf97679a1cf506833481e48d29b67dc82ab355ef --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_03/c.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_03 +task: siqa_alt_ov_03c +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03c +doc_to_decontamination_query: !function ../styles.template_03 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_04/a.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_04/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..251cf9482d8fae4852bfa32a02b4b41e0a012a4d --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_04/a.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_04 +task: siqa_alt_ov_04a +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04a +doc_to_decontamination_query: !function ../styles.template_04 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_04/b.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_04/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ea9619a7b04dde193772e1fec83d88c0f61e2a8e --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_04/b.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_04 +task: siqa_alt_ov_04b +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04b +doc_to_decontamination_query: !function ../styles.template_04 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_04/c.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_04/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8cacf98b91e784c9f758123dc69a310e7f7752d8 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_04/c.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_04 +task: siqa_alt_ov_04c +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04c +doc_to_decontamination_query: !function ../styles.template_04 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_05/a.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_05/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4753ad0bad5fd38da3789c2ca03560a451c5762a --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_05/a.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_05 +task: siqa_alt_ov_05a +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05a +doc_to_decontamination_query: !function ../styles.template_05 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_05/b.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_05/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0ae84f2d2e8ed2ba2fbaa06282ebdaf9f3e37c09 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_05/b.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_05 +task: siqa_alt_ov_05b +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05b +doc_to_decontamination_query: !function ../styles.template_05 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_05/c.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_05/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fd47fb6b24d3b2b57e9e99f1948fe358f491c708 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_05/c.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_05 +task: siqa_alt_ov_05c +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05c +doc_to_decontamination_query: !function ../styles.template_05 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_06/a.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_06/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..12774974e72f34277170fd23175019145fb32b80 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_06/a.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_06 +task: siqa_alt_ov_06a +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06a +doc_to_decontamination_query: !function ../styles.template_06 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_06/b.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_06/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..30cbf0ef7a9914db3b472722740f491982260aeb --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_06/b.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_06 +task: siqa_alt_ov_06b +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06b +doc_to_decontamination_query: !function ../styles.template_06 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_06/c.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_06/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f309834d9b1334407dfa5a9c3201eb8c0da5d11f --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_06/c.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_06 +task: siqa_alt_ov_06c +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06c +doc_to_decontamination_query: !function ../styles.template_06 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_07/a.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_07/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5cc468f92c964a9940a0ca50b452d8e8e9269a9c --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_07/a.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_07 +task: siqa_alt_ov_07a +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07a +doc_to_decontamination_query: !function ../styles.template_07 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_07/b.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_07/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e7de131964233cba766512c77aef004bb596f670 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_07/b.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_07 +task: siqa_alt_ov_07b +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07b +doc_to_decontamination_query: !function ../styles.template_07 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_07/c.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_07/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b0904d14200f5f6566b4f537167b5bb98826660a --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_07/c.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_07 +task: siqa_alt_ov_07c +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07c +doc_to_decontamination_query: !function ../styles.template_07 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_08/a.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_08/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0d971d4e550b5479dd32bb632141d24d5d7b5fb3 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_08/a.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_08 +task: siqa_alt_ov_08a +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08a +doc_to_decontamination_query: !function ../styles.template_08 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_08/b.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_08/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..32151413ee35f9a299a74eae321845f4560c2709 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_08/b.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_08 +task: siqa_alt_ov_08b +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08b +doc_to_decontamination_query: !function ../styles.template_08 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_08/c.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_08/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..10b32b01906332c669ad47435a55f0f32543f082 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_08/c.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_08 +task: siqa_alt_ov_08c +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08c +doc_to_decontamination_query: !function ../styles.template_08 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/styles.py b/lm_eval/tasks/siqa/alternative_worlds/output_variation/styles.py new file mode 100644 index 0000000000000000000000000000000000000000..ee263cc5530b797fd5667e0196e0db5ca5a44faf --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/styles.py @@ -0,0 +1,90 @@ +import string +from functools import partial + + +def doc_to_text_base(alphabet, style, doc): + + choices = [doc["answerA"], doc["answerB"], doc["answerC"]] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + + if "\t" in style: + choice_string = "{}{}" + else: + choice_string = "{} {}" + + doc_to_text = "\n".join( + [ + "Question: " + doc["context"] + " " + doc["question"], + ] + + [choice_string.format(i, j) for i, j in zip(letter_list, choices)] + + ["Answer:"] + ) + + return doc_to_text + + +# Full continuation +def choice_A(doc): + return doc["choices"]["text"] + + +# Letters only +def choice_B(alphabet, style, doc): + + choices = [doc["answerA"], doc["answerB"], doc["answerC"]] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" in style: + letter_list = [letter.replace("\t", "") for letter in letter_list] + + return letter_list + + +# Letters + Full continuation +def choice_C(alphabet, style, doc): + + choices = [doc["answerA"], doc["answerB"], doc["answerC"]] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" not in style: + letter_list = [letter + " " for letter in letter_list] + + return [letter + choice for letter, choice in zip(letter_list, choices)] + + +template_01 = partial(doc_to_text_base, string.ascii_lowercase, "({})") +choice_01a = choice_A +choice_01b = partial(choice_B, string.ascii_lowercase, "({})") +choice_01c = partial(choice_C, string.ascii_lowercase, "({})") +template_02 = partial(doc_to_text_base, string.ascii_lowercase, "{})") +choice_02a = choice_A +choice_02b = partial(choice_B, string.ascii_lowercase, "{})") +choice_02c = partial(choice_C, string.ascii_lowercase, "{})") +template_03 = partial(doc_to_text_base, string.ascii_lowercase, "{}.") +choice_03a = choice_A +choice_03b = partial(choice_B, string.ascii_lowercase, "{}.") +choice_03c = partial(choice_C, string.ascii_lowercase, "{}.") +template_04 = partial(doc_to_text_base, string.ascii_lowercase, "{}\t") +choice_04a = choice_A +choice_04b = partial(choice_B, string.ascii_lowercase, "{}\t") +choice_04c = partial(choice_C, string.ascii_lowercase, "{}\t") +template_05 = partial(doc_to_text_base, string.ascii_uppercase, "({})") +choice_05a = choice_A +choice_05b = partial(choice_B, string.ascii_uppercase, "({})") +choice_05c = partial(choice_C, string.ascii_uppercase, "({})") +template_06 = partial(doc_to_text_base, string.ascii_uppercase, "{})") +choice_06a = choice_A +choice_06b = partial(choice_B, string.ascii_uppercase, "{})") +choice_06c = partial(choice_C, string.ascii_uppercase, "{})") +template_07 = partial(doc_to_text_base, string.ascii_uppercase, "{}.") +choice_07a = choice_A +choice_07b = partial(choice_B, string.ascii_uppercase, "{}.") +choice_07c = partial(choice_C, string.ascii_uppercase, "{}.") +template_08 = partial(doc_to_text_base, string.ascii_uppercase, "{}\t") +choice_08a = choice_A +choice_08b = partial(choice_B, string.ascii_uppercase, "{}\t") +choice_08c = partial(choice_C, string.ascii_uppercase, "{}\t") diff --git a/lm_eval/tasks/siqa/alternative_worlds/prompt_variation/_siqa_alt_yaml b/lm_eval/tasks/siqa/alternative_worlds/prompt_variation/_siqa_alt_yaml new file mode 100644 index 0000000000000000000000000000000000000000..157c7389f1e0d014c3c9fa7452fbf0a85456b2d5 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/prompt_variation/_siqa_alt_yaml @@ -0,0 +1,19 @@ +task: social_iqa +dataset_path: social_i_qa +dataset_name: null +output_type: multiple_choice +training_split: train +validation_split: validation +doc_to_choice: ["{{answerA}}", "{{answerB}}", "{{answerC}}"] +doc_to_target: "{{label}}" +should_decontaminate: true +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: brier_score + aggregation: brier_score + higher_is_better: false diff --git a/lm_eval/tasks/siqa/alternative_worlds/prompt_variation/style_01.yaml b/lm_eval/tasks/siqa/alternative_worlds/prompt_variation/style_01.yaml new file mode 100644 index 0000000000000000000000000000000000000000..81cc8595729bf14f4e9b18dcb525416b70403414 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/prompt_variation/style_01.yaml @@ -0,0 +1,5 @@ +include: _siqa_alt_yaml +group: siqa_alt_pv +task: siqa_alt_pv_01 +doc_to_text: "{{context}} {{question}}" +doc_to_decontamination_query: "{{context}} {{question}}" diff --git a/lm_eval/tasks/siqa/alternative_worlds/prompt_variation/style_02.yaml b/lm_eval/tasks/siqa/alternative_worlds/prompt_variation/style_02.yaml new file mode 100644 index 0000000000000000000000000000000000000000..287bd5fdaea1fff85302ebc2c0ac99662639077c --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/prompt_variation/style_02.yaml @@ -0,0 +1,5 @@ +include: _siqa_alt_yaml +group: siqa_alt_pv +task: siqa_alt_pv_02 +doc_to_text: "Q: {{context}} {{question}}\nA:" +doc_to_decontamination_query: "Q: {{context}} {{question}}\nA:" diff --git a/lm_eval/tasks/siqa/alternative_worlds/prompt_variation/style_03.yaml b/lm_eval/tasks/siqa/alternative_worlds/prompt_variation/style_03.yaml new file mode 100644 index 0000000000000000000000000000000000000000..76d62c4c3910309eecbc5e7a6e86743308c2b865 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/prompt_variation/style_03.yaml @@ -0,0 +1,5 @@ +include: _siqa_alt_yaml +group: siqa_alt_pv +task: siqa_alt_pv_03 +doc_to_text: "Question: {{context}} {{question}}\nAnswer:" +doc_to_decontamination_query: "Question: {{context}} {{question}}\nAnswer:" diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/README.md b/lm_eval/tasks/super_glue/boolq/alternative_worlds/README.md new file mode 100644 index 0000000000000000000000000000000000000000..93600ae181761370baf6c9fdce7860af5737a978 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/README.md @@ -0,0 +1,20 @@ + + +Investigate affect of letter options +- (A) +- A) +- A. +- A\t +- (a) +- a) +- a. +- a\t + +Answer types: +- letters only + - original option + - just letter +- letters + continuation + - original option + - just letter +- continuation diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/_boolq_alt_ov_yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/_boolq_alt_ov_yaml new file mode 100644 index 0000000000000000000000000000000000000000..f2daf2140e1482104ebb2b5bb669287166d91727 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/_boolq_alt_ov_yaml @@ -0,0 +1,21 @@ +dataset_path: super_glue +dataset_name: boolq +output_type: multiple_choice +training_split: train +validation_split: validation +doc_to_text: "{{passage}}\nQuestion: {{question}}?\nAnswer:" +doc_to_target: label +doc_to_choice: ["no", "yes"] +should_decontaminate: true +doc_to_decontamination_query: passage + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: brier_score + aggregation: brier_score + higher_is_better: false +metadata: + - version: 1.0 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/boolq_ov.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/boolq_ov.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a887cf174ab96a941baef13ce37318853d33d4a5 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/boolq_ov.yaml @@ -0,0 +1,10 @@ +group: boolq_alt_ov +task: + - boolq_alt_ov_01 + - boolq_alt_ov_02 + - boolq_alt_ov_03 + - boolq_alt_ov_04 + - boolq_alt_ov_05 + - boolq_alt_ov_06 + - boolq_alt_ov_07 + - boolq_alt_ov_08 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_01/a.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_01/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..be017f32a58ba68ba2c111bcdb45880de5c2ff7b --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_01/a.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_01 +task: boolq_alt_ov_01a +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01a +doc_to_decontamination_query: !function ../styles.template_01 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_01/b.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_01/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b8c498a288ac32771ce23834c273fdf9c6b1b551 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_01/b.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_01 +task: boolq_alt_ov_01b +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01b +doc_to_decontamination_query: !function ../styles.template_01 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_01/c.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_01/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..886a759726f6e5ad652364f90da139fb694d0f73 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_01/c.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_01 +task: boolq_alt_ov_01c +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01c +doc_to_decontamination_query: !function ../styles.template_01 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_02/a.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_02/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..265755501a07e04ed8b0dcab3e4f34d9da8ef964 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_02/a.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_02 +task: boolq_alt_ov_02a +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02a +doc_to_decontamination_query: !function ../styles.template_02 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_02/b.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_02/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8cf9122edc12b2de802fe0c3d495e89a012c0321 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_02/b.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_02 +task: boolq_alt_ov_02b +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02b +doc_to_decontamination_query: !function ../styles.template_02 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_02/c.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_02/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..336f9e99000a5bd626c99ed4b7685afec58a0b6b --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_02/c.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_02 +task: boolq_alt_ov_02c +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02c +doc_to_decontamination_query: !function ../styles.template_02 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_03/a.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_03/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3254a128dd496a2142c3beb7cc6749f6a95ee488 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_03/a.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_03 +task: boolq_alt_ov_03a +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03a +doc_to_decontamination_query: !function ../styles.template_03 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_03/b.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_03/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..683b9c3c93ea04d6aa07102ebddff6b7a9055cac --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_03/b.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_03 +task: boolq_alt_ov_03b +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03b +doc_to_decontamination_query: !function ../styles.template_03 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_03/c.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_03/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..63fbec397f3cc6f30de56a58203ab1da56db9f26 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_03/c.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_03 +task: boolq_alt_ov_03c +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03c +doc_to_decontamination_query: !function ../styles.template_03 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_04/a.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_04/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b7a88f2e0a9916f2e638069ab5c45c2c3c63cb80 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_04/a.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_04 +task: boolq_alt_ov_04a +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04a +doc_to_decontamination_query: !function ../styles.template_04 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_04/b.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_04/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dddc7468897b7837d11809c6f8355527eaeb4ab1 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_04/b.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_04 +task: boolq_alt_ov_04b +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04b +doc_to_decontamination_query: !function ../styles.template_04 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_04/c.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_04/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ce0c4aac2b3e31a4923b2837143a2078a33d05e9 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_04/c.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_04 +task: boolq_alt_ov_04c +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04c +doc_to_decontamination_query: !function ../styles.template_04 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_05/a.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_05/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..42629c27a67ba0e08abcdcc23e665a1235ae9a73 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_05/a.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_05 +task: boolq_alt_ov_05a +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05a +doc_to_decontamination_query: !function ../styles.template_05 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_05/b.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_05/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9e231f53ee22b75b3c595882993e56f6bbaae27f --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_05/b.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_05 +task: boolq_alt_ov_05b +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05b +doc_to_decontamination_query: !function ../styles.template_05 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_05/c.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_05/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..44594c911e638ad30fb9ebb456c163d2364948d3 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_05/c.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_05 +task: boolq_alt_ov_05c +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05c +doc_to_decontamination_query: !function ../styles.template_05 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_06/a.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_06/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f3efdf441782f669d047ae865a78c39b47c2298 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_06/a.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_06 +task: boolq_alt_ov_06a +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06a +doc_to_decontamination_query: !function ../styles.template_06 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_06/b.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_06/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6602bad3353052931d1d1bcca1ee84f15dd4c409 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_06/b.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_06 +task: boolq_alt_ov_06b +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06b +doc_to_decontamination_query: !function ../styles.template_06 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_06/c.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_06/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7bc23a3f7b9ee403c61a9f10019b71d7ccee1e30 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_06/c.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_06 +task: boolq_alt_ov_06c +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06c +doc_to_decontamination_query: !function ../styles.template_06 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_07/a.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_07/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6b1aa481678b1b5117e68205ed808aed0f759e91 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_07/a.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_07 +task: boolq_alt_ov_07a +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07a +doc_to_decontamination_query: !function ../styles.template_07 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_07/b.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_07/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..75fe17ef93ca3e4f435d73d27f28a6b588625df1 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_07/b.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_07 +task: boolq_alt_ov_07b +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07b +doc_to_decontamination_query: !function ../styles.template_07 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_07/c.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_07/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..92d5b26473e517d58315475257a3cb60769c93d7 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_07/c.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_07 +task: boolq_alt_ov_07c +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07c +doc_to_decontamination_query: !function ../styles.template_07 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_08/a.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_08/a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0d5c29ea0f670fc593b592f8ec140e25e02148de --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_08/a.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_08 +task: boolq_alt_ov_08a +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08a +doc_to_decontamination_query: !function ../styles.template_08 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_08/b.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_08/b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0b7a0967415cd73fd75fe357bf575b20bc6274a6 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_08/b.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_08 +task: boolq_alt_ov_08b +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08b +doc_to_decontamination_query: !function ../styles.template_08 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_08/c.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_08/c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f84b105aeaffff593381cc027d780d6f7dea10d7 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_08/c.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_08 +task: boolq_alt_ov_08c +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08c +doc_to_decontamination_query: !function ../styles.template_08 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/styles.py b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/styles.py new file mode 100644 index 0000000000000000000000000000000000000000..6dd5205927d70a34ac769a3009b7e97cdaec1291 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/styles.py @@ -0,0 +1,91 @@ +import string +from functools import partial + + +def doc_to_text_base(alphabet, style, doc): + + choices = ["no", "yes"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + + if "\t" in style: + choice_string = "{}{}" + else: + choice_string = "{} {}" + + doc_to_text = "\n".join( + [doc["passage"]] + + [ + "Question: " + doc["question"], + ] + + [choice_string.format(i, j) for i, j in zip(letter_list, choices)] + + ["Answer:"] + ) + + return doc_to_text + + +# Full continuation +def choice_A(doc): + return doc["choices"]["text"] + + +# Letters only +def choice_B(alphabet, style, doc): + + choices = ["no", "yes"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" in style: + letter_list = [letter.replace("\t", "") for letter in letter_list] + + return letter_list + + +# Letters + Full continuation +def choice_C(alphabet, style, doc): + + choices = ["no", "yes"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" not in style: + letter_list = [letter + " " for letter in letter_list] + + return [letter + choice for letter, choice in zip(letter_list, choices)] + + +template_01 = partial(doc_to_text_base, string.ascii_lowercase, "({})") +choice_01a = choice_A +choice_01b = partial(choice_B, string.ascii_lowercase, "({})") +choice_01c = partial(choice_C, string.ascii_lowercase, "({})") +template_02 = partial(doc_to_text_base, string.ascii_lowercase, "{})") +choice_02a = choice_A +choice_02b = partial(choice_B, string.ascii_lowercase, "{})") +choice_02c = partial(choice_C, string.ascii_lowercase, "{})") +template_03 = partial(doc_to_text_base, string.ascii_lowercase, "{}.") +choice_03a = choice_A +choice_03b = partial(choice_B, string.ascii_lowercase, "{}.") +choice_03c = partial(choice_C, string.ascii_lowercase, "{}.") +template_04 = partial(doc_to_text_base, string.ascii_lowercase, "{}\t") +choice_04a = choice_A +choice_04b = partial(choice_B, string.ascii_lowercase, "{}\t") +choice_04c = partial(choice_C, string.ascii_lowercase, "{}\t") +template_05 = partial(doc_to_text_base, string.ascii_uppercase, "({})") +choice_05a = choice_A +choice_05b = partial(choice_B, string.ascii_uppercase, "({})") +choice_05c = partial(choice_C, string.ascii_uppercase, "({})") +template_06 = partial(doc_to_text_base, string.ascii_uppercase, "{})") +choice_06a = choice_A +choice_06b = partial(choice_B, string.ascii_uppercase, "{})") +choice_06c = partial(choice_C, string.ascii_uppercase, "{})") +template_07 = partial(doc_to_text_base, string.ascii_uppercase, "{}.") +choice_07a = choice_A +choice_07b = partial(choice_B, string.ascii_uppercase, "{}.") +choice_07c = partial(choice_C, string.ascii_uppercase, "{}.") +template_08 = partial(doc_to_text_base, string.ascii_uppercase, "{}\t") +choice_08a = choice_A +choice_08b = partial(choice_B, string.ascii_uppercase, "{}\t") +choice_08c = partial(choice_C, string.ascii_uppercase, "{}\t") diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/prompt_variation/_piqa_yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/prompt_variation/_piqa_yaml new file mode 100644 index 0000000000000000000000000000000000000000..f2daf2140e1482104ebb2b5bb669287166d91727 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/prompt_variation/_piqa_yaml @@ -0,0 +1,21 @@ +dataset_path: super_glue +dataset_name: boolq +output_type: multiple_choice +training_split: train +validation_split: validation +doc_to_text: "{{passage}}\nQuestion: {{question}}?\nAnswer:" +doc_to_target: label +doc_to_choice: ["no", "yes"] +should_decontaminate: true +doc_to_decontamination_query: passage + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: brier_score + aggregation: brier_score + higher_is_better: false +metadata: + - version: 1.0 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/prompt_variation/style_01.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/prompt_variation/style_01.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3b8d1960328fd6c8d285ad1af4a2dd003e6d16a5 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/prompt_variation/style_01.yaml @@ -0,0 +1,5 @@ +include: _boolq_yaml +group: boolq_alt_pv +task: boolq_alt_pv_01 +doc_to_text: "{{passage}}\n{{question}}?" +doc_to_decontamination_query: "{{passage}}\n{{question}}?" diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/prompt_variation/style_02.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/prompt_variation/style_02.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1cd464f1ddc19d60d0d47320dc887a4feab6ed37 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/prompt_variation/style_02.yaml @@ -0,0 +1,5 @@ +include: _boolq_yaml +group: boolq_alt_pv +task: boolq_alt_pv_02 +doc_to_text: "{{passage}}\nQ: {{question}}?\nA:" +doc_to_decontamination_query: "{{passage}}\nQ: {{question}}?\nA:" diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/prompt_variation/style_03.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/prompt_variation/style_03.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9e36afd8adaf072370a4e5e094d452a3450d3fa6 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/prompt_variation/style_03.yaml @@ -0,0 +1,5 @@ +include: _boolq_yaml +group: boolq_alt_pv +task: boolq_alt_pv_03 +doc_to_text: "{{passage}}\nQuestion: {{question}}?\nAnswer:" +doc_to_decontamination_query: "{{passage}}\nQuestion: {{question}}?\nAnswer:" diff --git a/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_01/truthfulqa_mc1.yaml b/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_01/truthfulqa_mc1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2e6f00bbb4db44aa0efabd44b02c4bc6e578af88 --- /dev/null +++ b/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_01/truthfulqa_mc1.yaml @@ -0,0 +1,17 @@ +task: truthfulqa_mc1_alt_pv_01 +dataset_path: truthful_qa +dataset_name: multiple_choice +output_type: multiple_choice +training_split: null +validation_split: validation +test_split: null +num_fewshot: 0 +doc_to_text: "{{question}}" +doc_to_target: 0 +doc_to_choice: "{{mc1_targets.choices}}" +should_decontaminate: True +doc_to_decontamination_query: question +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true diff --git a/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_01/truthfulqa_mc2.yaml b/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_01/truthfulqa_mc2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..93b27828f6f6cedaf2b8023b7e6b41775fec4540 --- /dev/null +++ b/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_01/truthfulqa_mc2.yaml @@ -0,0 +1,11 @@ +include: truthfulqa_mc1.yaml +task: truthfulqa_mc2_alt_pv_01 +doc_to_target: 0 +doc_to_choice: "{{mc2_targets.choices}}" +process_results: !function ../../../utils.process_results_mc2 +should_decontaminate: True +doc_to_decontamination_query: question +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true diff --git a/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_02/truthfulqa_mc1.yaml b/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_02/truthfulqa_mc1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f78ceebec32b1aa850b2c3a0b5d3404f8f56f1f --- /dev/null +++ b/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_02/truthfulqa_mc1.yaml @@ -0,0 +1,17 @@ +task: truthfulqa_mc1_alt_pv_02 +dataset_path: truthful_qa +dataset_name: multiple_choice +output_type: multiple_choice +training_split: null +validation_split: validation +test_split: null +num_fewshot: 0 +doc_to_choice: "Q: {{mc2_targets.choices}}\nA:" +doc_to_target: 0 +doc_to_choice: "{{mc1_targets.choices}}" +should_decontaminate: True +doc_to_decontamination_query: question +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true diff --git a/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_02/truthfulqa_mc2.yaml b/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_02/truthfulqa_mc2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe4535bc1abf45e33c6d02445f238c2b8f8cc1fd --- /dev/null +++ b/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_02/truthfulqa_mc2.yaml @@ -0,0 +1,11 @@ +include: truthfulqa_mc1.yaml +task: truthfulqa_mc2_alt_pv_02 +doc_to_target: 0 +doc_to_choice: "{{mc2_targets.choices}}" +process_results: !function ../../../utils.process_results_mc2 +should_decontaminate: True +doc_to_decontamination_query: question +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true diff --git a/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_03/truthfulqa_mc1.yaml b/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_03/truthfulqa_mc1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e6417d98f7dc895bb821942210cbd0384e5fb883 --- /dev/null +++ b/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_03/truthfulqa_mc1.yaml @@ -0,0 +1,17 @@ +task: truthfulqa_mc1_alt_pv_03 +dataset_path: truthful_qa +dataset_name: multiple_choice +output_type: multiple_choice +training_split: null +validation_split: validation +test_split: null +num_fewshot: 0 +doc_to_choice: "Question: {{mc2_targets.choices}}\nAnswer:" +doc_to_target: 0 +doc_to_choice: "{{mc1_targets.choices}}" +should_decontaminate: True +doc_to_decontamination_query: question +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true diff --git a/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_03/truthfulqa_mc2.yaml b/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_03/truthfulqa_mc2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1862aa83676cc95eb6593de97e6d74fecfa19fed --- /dev/null +++ b/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_03/truthfulqa_mc2.yaml @@ -0,0 +1,11 @@ +include: truthfulqa_mc1.yaml +task: truthfulqa_mc2_alt_pv_03 +doc_to_target: 0 +doc_to_choice: "{{mc2_targets.choices}}" +process_results: !function ../../../utils.process_results_mc2 +should_decontaminate: True +doc_to_decontamination_query: question +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true diff --git a/lm_eval/tasks/truthfulqa/alternative_worlds/truthfulqa_mc1.yaml b/lm_eval/tasks/truthfulqa/alternative_worlds/truthfulqa_mc1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ac04860c9d1fbb76f00eee68248d7cb8dfaaf2cd --- /dev/null +++ b/lm_eval/tasks/truthfulqa/alternative_worlds/truthfulqa_mc1.yaml @@ -0,0 +1,19 @@ +group: + - truthfulqa +task: truthfulqa_mc1 +dataset_path: truthful_qa +dataset_name: multiple_choice +output_type: multiple_choice +training_split: null +validation_split: validation +test_split: null +num_fewshot: 0 +doc_to_text: "{{question}}" +doc_to_target: 0 +doc_to_choice: "{{mc1_targets.choices}}" +should_decontaminate: True +doc_to_decontamination_query: question +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true diff --git a/lm_eval/tasks/truthfulqa/alternative_worlds/truthfulqa_mc2.yaml b/lm_eval/tasks/truthfulqa/alternative_worlds/truthfulqa_mc2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4b09c7d83019572822909c5fe88c99810f700f61 --- /dev/null +++ b/lm_eval/tasks/truthfulqa/alternative_worlds/truthfulqa_mc2.yaml @@ -0,0 +1,11 @@ +include: truthfulqa_mc1.yaml +task: truthfulqa_mc2 +doc_to_target: 0 +doc_to_choice: "{{mc2_targets.choices}}" +process_results: !function utils.process_results_mc2 +should_decontaminate: True +doc_to_decontamination_query: question +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true diff --git a/lm_eval/utils.py b/lm_eval/utils.py index f0ec330e02e72575bcee6fe1efd0dd1110b65e3e..78ae63ea8c7e997615083c2d8ec70c72e4d62cbe 100644 --- a/lm_eval/utils.py +++ b/lm_eval/utils.py @@ -28,6 +28,8 @@ from typing import ( import torch import transformers +import numpy as np + import yaml from jinja2 import BaseLoader, Environment, StrictUndefined @@ -178,6 +180,12 @@ def pattern_match(patterns, source_list): return sorted(list(task_names)) +def softmax(x): + """Compute softmax values for each sets of scores in x.""" + e_x = np.exp(x - np.max(x)) + return e_x / e_x.sum() + + def general_detokenize(string): string = string.replace(" n't", "n't") string = string.replace(" )", ")")