remove template_aliases from existing tasks

98c5411c · haileyschoelkopf · 2bd45fd9 · 98c5411c · 98c5411c · 98c5411c
Commit 98c5411c authored Jul 18, 2023 by haileyschoelkopf
20 changed files
--- a/lm_eval/api/task.py
+++ b/lm_eval/api/task.py
@@ -65,7 +65,7 @@ class TaskConfig(dict):
    fewshot_split: str = None  # TODO: assert that this not None if num_fewshot > 0. (?) assert if this is same split as one evaling (?)
    # formatting / prompting options.
    # see docs/advanced_task_guide.md for more info
-    template_aliases: Union[str, list] = None
+    process_docs: Callable = None
    doc_to_text: Union[Callable, str] = None
    doc_to_target: Union[Callable, str] = None
    doc_to_choice: Union[Callable, str, dict, list] = None
@@ -91,15 +91,15 @@ class TaskConfig(dict):
        # allow user-specified aliases so that users can
        # force prompt-compatibility for some prompt regardless of
        # field names in prompt
-        if self.template_aliases:
+        # if self.template_aliases:
-            if type(self.doc_to_text) == str:
+        #     if type(self.doc_to_text) == str:
-                self.doc_to_text = self.template_aliases + self.doc_to_text
+        #         self.doc_to_text = self.template_aliases + self.doc_to_text
-            if type(self.doc_to_target) == str:
+        #     if type(self.doc_to_target) == str:
-                self.doc_to_target = self.template_aliases + self.doc_to_target
+        #         self.doc_to_target = self.template_aliases + self.doc_to_target
-            if type(self.gold_alias) == str:
+        #     if type(self.gold_alias) == str:
-                self.gold_alias = self.template_aliases + self.gold_alias
+        #         self.gold_alias = self.template_aliases + self.gold_alias
        if self.generation_kwargs is not None:
            if self.output_type != "greedy_until":
@@ -619,9 +619,9 @@ class ConfigurableTask(Task):
                list(self.fewshot_docs()), self, rnd=random.Random(1234)
            )
-        if self._config.template_aliases is not None:
+        # if self._config.template_aliases is not None:
-            for key, alias in self._config.template_aliases:
+        #     for key, alias in self._config.template_aliases:
-                self.dataset.rename_column(key, alias)
+        #         self.dataset.rename_column(key, alias)
        if self.has_test_docs():
            docs = self.test_docs()
@@ -680,15 +680,25 @@ class ConfigurableTask(Task):
            return False
    def training_docs(self):
-        if self._config.training_split is not None:
+        if self.has_training_docs():
+            if self._config.process_docs:
+                return self._config.process_docs(
+                    self.dataset[self._config.training_split]
+                )
            return self.dataset[self._config.training_split]
    def validation_docs(self):
-        if self._config.validation_split is not None:
+        if self.has_validation_docs():
+            if self._config.process_docs:
+                return self._config.process_docs(
+                    self.dataset[self._config.validation_split]
+                )
            return self.dataset[self._config.validation_split]
    def test_docs(self):
-        if self._config.test_split is not None:
+        if self.has_test_docs():
+            if self._config.process_docs:
+                return self._config.process_docs(self.dataset[self._config.test_split])
            return self.dataset[self._config.test_split]
    def fewshot_docs(self):

--- a/lm_eval/tasks/arithmetic/arithmetic_1dc.yaml
+++ b/lm_eval/tasks/arithmetic/arithmetic_1dc.yaml
@@ -6,7 +6,6 @@ dataset_name: arithmetic_1dc
 output_type: loglikelihood
 validation_split: validation
 test_split: null
-template_aliases: ""
 doc_to_text: "{{context}}"
 doc_to_target: "{{completion}}"
 metric_list:

--- a/lm_eval/tasks/arithmetic/arithmetic_2da.yaml
+++ b/lm_eval/tasks/arithmetic/arithmetic_2da.yaml
-group:
+include: arithmetic_1dc.yaml
-  - arithmetic
 task: arithmetic_2da
-dataset_path: EleutherAI/arithmetic
 dataset_name: arithmetic_2da
-output_type: loglikelihood
-validation_split: validation
-test_split: null
-template_aliases: ""
-doc_to_text: "{{context}}"
-doc_to_target: "{{completion}}"
-metric_list:
-  - metric: acc
-    aggregation: mean
-    higher_is_better: true
--- a/lm_eval/tasks/arithmetic/arithmetic_2dm.yaml
+++ b/lm_eval/tasks/arithmetic/arithmetic_2dm.yaml
-group:
+include: arithmetic_1dc.yaml
-  - arithmetic
 task: arithmetic_2dm
-dataset_path: EleutherAI/arithmetic
 dataset_name: arithmetic_2dm
-output_type: loglikelihood
-validation_split: validation
-test_split: null
-template_aliases: ""
-doc_to_text: "{{context}}"
-doc_to_target: "{{completion}}"
-metric_list:
-  - metric: acc
-    aggregation: mean
-    higher_is_better: true
--- a/lm_eval/tasks/arithmetic/arithmetic_2ds.yaml
+++ b/lm_eval/tasks/arithmetic/arithmetic_2ds.yaml
-group:
+include: arithmetic_1dc.yaml
-  - arithmetic
 task: arithmetic_2ds
-dataset_path: EleutherAI/arithmetic
 dataset_name: arithmetic_2ds
-output_type: loglikelihood
-validation_split: validation
-test_split: null
-template_aliases: ""
-doc_to_text: "{{context}}"
-doc_to_target: "{{completion}}"
-metric_list:
-  - metric: acc
-    aggregation: mean
-    higher_is_better: true
--- a/lm_eval/tasks/arithmetic/arithmetic_3da.yaml
+++ b/lm_eval/tasks/arithmetic/arithmetic_3da.yaml
-group:
+include: arithmetic_1dc.yaml
-  - arithmetic
 task: arithmetic_3da
-dataset_path: EleutherAI/arithmetic
 dataset_name: arithmetic_3da
-output_type: loglikelihood
-validation_split: validation
-test_split: null
-template_aliases: ""
-doc_to_text: "{{context}}"
-doc_to_target: "{{completion}}"
-metric_list:
-  - metric: acc
-    aggregation: mean
-    higher_is_better: true
--- a/lm_eval/tasks/arithmetic/arithmetic_3ds.yaml
+++ b/lm_eval/tasks/arithmetic/arithmetic_3ds.yaml
-group:
+include: arithmetic_1dc.yaml
-  - arithmetic
 task: arithmetic_3ds
-dataset_path: EleutherAI/arithmetic
 dataset_name: arithmetic_3ds
-output_type: loglikelihood
-validation_split: validation
-test_split: null
-template_aliases: ""
-doc_to_text: "{{context}}"
-doc_to_target: "{{completion}}"
-metric_list:
-  - metric: acc
-    aggregation: mean
-    higher_is_better: true
--- a/lm_eval/tasks/arithmetic/arithmetic_4da.yaml
+++ b/lm_eval/tasks/arithmetic/arithmetic_4da.yaml
-group:
+include: arithmetic_1dc.yaml
-  - arithmetic
 task: arithmetic_4da
-dataset_path: EleutherAI/arithmetic
 dataset_name: arithmetic_4da
-output_type: loglikelihood
-validation_split: validation
-test_split: null
-template_aliases: ""
-doc_to_text: "{{context}}"
-doc_to_target: "{{completion}}"
-metric_list:
-  - metric: acc
-    aggregation: mean
-    higher_is_better: true
--- a/lm_eval/tasks/arithmetic/arithmetic_4ds.yaml
+++ b/lm_eval/tasks/arithmetic/arithmetic_4ds.yaml
-group:
+include: arithmetic_1dc.yaml
-  - arithmetic
 task: arithmetic_4ds
-dataset_path: EleutherAI/arithmetic
 dataset_name: arithmetic_4ds
-output_type: loglikelihood
-validation_split: validation
-test_split: null
-template_aliases: ""
-doc_to_text: "{{context}}"
-doc_to_target: "{{completion}}"
-metric_list:
-  - metric: acc
-    aggregation: mean
-    higher_is_better: true
--- a/lm_eval/tasks/arithmetic/arithmetic_5da.yaml
+++ b/lm_eval/tasks/arithmetic/arithmetic_5da.yaml
-group:
+include: arithmetic_1dc.yaml
-  - arithmetic
 task: arithmetic_5da
-dataset_path: EleutherAI/arithmetic
 dataset_name: arithmetic_5da
-output_type: loglikelihood
-validation_split: validation
-test_split: null
-template_aliases: ""
-doc_to_text: "{{context}}"
-doc_to_target: "{{completion}}"
-metric_list:
-  - metric: acc
-    aggregation: mean
-    higher_is_better: true
--- a/lm_eval/tasks/arithmetic/arithmetic_5ds.yaml
+++ b/lm_eval/tasks/arithmetic/arithmetic_5ds.yaml
-group:
+include: arithmetic_1dc.yaml
-  - arithmetic
 task: arithmetic_5ds
-dataset_path: EleutherAI/arithmetic
 dataset_name: arithmetic_5ds
-output_type: loglikelihood
-validation_split: validation
-test_split: null
-template_aliases: ""
-doc_to_text: "{{context}}"
-doc_to_target: "{{completion}}"
-metric_list:
-  - metric: acc
-    aggregation: mean
-    higher_is_better: true
--- a/lm_eval/tasks/hendrycks_ethics/commonsense.yaml
+++ b/lm_eval/tasks/hendrycks_ethics/commonsense.yaml
 group:
  - hendrycks_ethics
 task: ethics_cm
-dataset_path: hails/hendrycks_ethics
+dataset_path: EleutherAI/hendrycks_ethics
 dataset_name: commonsense
 output_type: multiple_choice
 training_split: train

--- a/lm_eval/tasks/hendrycks_ethics/deontology.yaml
+++ b/lm_eval/tasks/hendrycks_ethics/deontology.yaml
 include: commonsense.yaml
 task: ethics_deontology
-dataset_path: hails/hendrycks_ethics
 dataset_name: deontology
 doc_to_text: "Question: Would most people believe this reasonable or unreasonable to say? \"{{scenario}} {{excuse.rstrip()}}\"\nAnswer:"
 doc_to_target: label

--- a/lm_eval/tasks/hendrycks_ethics/justice.yaml
+++ b/lm_eval/tasks/hendrycks_ethics/justice.yaml
@@ -3,6 +3,5 @@ group:
  - hendrycks_ethics
 task: ethics_justice
 dataset_name: justice
-output_type: multiple_choice
 doc_to_text: "Question: Would most people believe this reasonable or unreasonable to say? \"{{scenario}}\"\nAnswer:"
 # TODO: impl. exact match for this and deontology
--- a/lm_eval/tasks/hendrycks_ethics/utilitarianism.yaml
+++ b/lm_eval/tasks/hendrycks_ethics/utilitarianism.yaml
@@ -2,11 +2,7 @@ include: commonsense.yaml
 group:
  - hendrycks_ethics
 task: ethics_utilitarianism
-dataset_path: hails/hendrycks_ethics
 dataset_name: utilitarianism
-output_type: multiple_choice
-training_split: train
-test_split: test
 doc_to_text: !function utils.doc_to_text
 doc_to_target: !function utils.doc_to_target
 doc_to_choice: ['no', 'yes']

--- a/lm_eval/tasks/lambada/lambada_openai.yaml
+++ b/lm_eval/tasks/lambada/lambada_openai.yaml
@@ -7,7 +7,6 @@ dataset_path: EleutherAI/lambada_openai
 dataset_name: default
 output_type: loglikelihood
 test_split: test
-template_aliases: ""
 doc_to_text: "{{text.split(' ')[:-1]|join(' ')}}"
 doc_to_target: "{{' '+text.split(' ')[-1]}}"
 should_decontaminate: true

--- a/lm_eval/tasks/lambada/lambada_standard.yaml
+++ b/lm_eval/tasks/lambada/lambada_standard.yaml
@@ -8,7 +8,6 @@ dataset_name: null
 output_type: loglikelihood
 validation_split: validation
 test_split: test
-template_aliases: ""
 doc_to_text: "{{text.split(' ')[:-1]|join(' ')}}"
 doc_to_target: "{{' '+text.split(' ')[-1]}}"
 should_decontaminate: true

--- a/lm_eval/tasks/lambada_cloze/lambada_openai_cloze.yaml
+++ b/lm_eval/tasks/lambada_cloze/lambada_openai_cloze.yaml
@@ -6,7 +6,6 @@ dataset_path: EleutherAI/lambada_openai
 dataset_name: default
 output_type: loglikelihood
 test_split: test
-template_aliases: ""
 doc_to_text: "{{text.split(' ')[:-1]|join(' ')}} ____. ->"
 doc_to_target: "{{' '+text.split(' ')[-1]}}"
 should_decontaminate: true

--- a/lm_eval/tasks/lambada_cloze/lambada_standard_cloze.yaml
+++ b/lm_eval/tasks/lambada_cloze/lambada_standard_cloze.yaml
@@ -7,7 +7,6 @@ dataset_name: null
 output_type: loglikelihood
 validation_split: validation
 test_split: test
-template_aliases: ""
 doc_to_text: "{{text.split(' ')[:-1]|join(' ')}} ____. ->"
 doc_to_target: "{{' '+text.split(' ')[-1]}}"
 should_decontaminate: true

--- a/lm_eval/tasks/lambada_multilingual/lambada_mt_en.yaml
+++ b/lm_eval/tasks/lambada_multilingual/lambada_mt_en.yaml
@@ -7,7 +7,6 @@ dataset_path: EleutherAI/lambada_openai
 dataset_name: en
 output_type: loglikelihood
 test_split: test
-template_aliases: ""
 doc_to_text: "{{text.split(' ')[:-1]|join(' ')}}"
 doc_to_target: "{{' '+text.split(' ')[-1]}}"
 should_decontaminate: true