Commit 98c5411c authored by haileyschoelkopf's avatar haileyschoelkopf
Browse files

remove template_aliases from existing tasks

parent 2bd45fd9
...@@ -65,7 +65,7 @@ class TaskConfig(dict): ...@@ -65,7 +65,7 @@ class TaskConfig(dict):
fewshot_split: str = None # TODO: assert that this not None if num_fewshot > 0. (?) assert if this is same split as one evaling (?) fewshot_split: str = None # TODO: assert that this not None if num_fewshot > 0. (?) assert if this is same split as one evaling (?)
# formatting / prompting options. # formatting / prompting options.
# see docs/advanced_task_guide.md for more info # see docs/advanced_task_guide.md for more info
template_aliases: Union[str, list] = None process_docs: Callable = None
doc_to_text: Union[Callable, str] = None doc_to_text: Union[Callable, str] = None
doc_to_target: Union[Callable, str] = None doc_to_target: Union[Callable, str] = None
doc_to_choice: Union[Callable, str, dict, list] = None doc_to_choice: Union[Callable, str, dict, list] = None
...@@ -91,15 +91,15 @@ class TaskConfig(dict): ...@@ -91,15 +91,15 @@ class TaskConfig(dict):
# allow user-specified aliases so that users can # allow user-specified aliases so that users can
# force prompt-compatibility for some prompt regardless of # force prompt-compatibility for some prompt regardless of
# field names in prompt # field names in prompt
if self.template_aliases: # if self.template_aliases:
if type(self.doc_to_text) == str: # if type(self.doc_to_text) == str:
self.doc_to_text = self.template_aliases + self.doc_to_text # self.doc_to_text = self.template_aliases + self.doc_to_text
if type(self.doc_to_target) == str: # if type(self.doc_to_target) == str:
self.doc_to_target = self.template_aliases + self.doc_to_target # self.doc_to_target = self.template_aliases + self.doc_to_target
if type(self.gold_alias) == str: # if type(self.gold_alias) == str:
self.gold_alias = self.template_aliases + self.gold_alias # self.gold_alias = self.template_aliases + self.gold_alias
if self.generation_kwargs is not None: if self.generation_kwargs is not None:
if self.output_type != "greedy_until": if self.output_type != "greedy_until":
...@@ -619,9 +619,9 @@ class ConfigurableTask(Task): ...@@ -619,9 +619,9 @@ class ConfigurableTask(Task):
list(self.fewshot_docs()), self, rnd=random.Random(1234) list(self.fewshot_docs()), self, rnd=random.Random(1234)
) )
if self._config.template_aliases is not None: # if self._config.template_aliases is not None:
for key, alias in self._config.template_aliases: # for key, alias in self._config.template_aliases:
self.dataset.rename_column(key, alias) # self.dataset.rename_column(key, alias)
if self.has_test_docs(): if self.has_test_docs():
docs = self.test_docs() docs = self.test_docs()
...@@ -680,15 +680,25 @@ class ConfigurableTask(Task): ...@@ -680,15 +680,25 @@ class ConfigurableTask(Task):
return False return False
def training_docs(self): def training_docs(self):
if self._config.training_split is not None: if self.has_training_docs():
if self._config.process_docs:
return self._config.process_docs(
self.dataset[self._config.training_split]
)
return self.dataset[self._config.training_split] return self.dataset[self._config.training_split]
def validation_docs(self): def validation_docs(self):
if self._config.validation_split is not None: if self.has_validation_docs():
if self._config.process_docs:
return self._config.process_docs(
self.dataset[self._config.validation_split]
)
return self.dataset[self._config.validation_split] return self.dataset[self._config.validation_split]
def test_docs(self): def test_docs(self):
if self._config.test_split is not None: if self.has_test_docs():
if self._config.process_docs:
return self._config.process_docs(self.dataset[self._config.test_split])
return self.dataset[self._config.test_split] return self.dataset[self._config.test_split]
def fewshot_docs(self): def fewshot_docs(self):
......
...@@ -6,7 +6,6 @@ dataset_name: arithmetic_1dc ...@@ -6,7 +6,6 @@ dataset_name: arithmetic_1dc
output_type: loglikelihood output_type: loglikelihood
validation_split: validation validation_split: validation
test_split: null test_split: null
template_aliases: ""
doc_to_text: "{{context}}" doc_to_text: "{{context}}"
doc_to_target: "{{completion}}" doc_to_target: "{{completion}}"
metric_list: metric_list:
......
group: include: arithmetic_1dc.yaml
- arithmetic
task: arithmetic_2da task: arithmetic_2da
dataset_path: EleutherAI/arithmetic
dataset_name: arithmetic_2da dataset_name: arithmetic_2da
output_type: loglikelihood
validation_split: validation
test_split: null
template_aliases: ""
doc_to_text: "{{context}}"
doc_to_target: "{{completion}}"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
group: include: arithmetic_1dc.yaml
- arithmetic
task: arithmetic_2dm task: arithmetic_2dm
dataset_path: EleutherAI/arithmetic
dataset_name: arithmetic_2dm dataset_name: arithmetic_2dm
output_type: loglikelihood
validation_split: validation
test_split: null
template_aliases: ""
doc_to_text: "{{context}}"
doc_to_target: "{{completion}}"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
group: include: arithmetic_1dc.yaml
- arithmetic
task: arithmetic_2ds task: arithmetic_2ds
dataset_path: EleutherAI/arithmetic
dataset_name: arithmetic_2ds dataset_name: arithmetic_2ds
output_type: loglikelihood
validation_split: validation
test_split: null
template_aliases: ""
doc_to_text: "{{context}}"
doc_to_target: "{{completion}}"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
group: include: arithmetic_1dc.yaml
- arithmetic
task: arithmetic_3da task: arithmetic_3da
dataset_path: EleutherAI/arithmetic
dataset_name: arithmetic_3da dataset_name: arithmetic_3da
output_type: loglikelihood
validation_split: validation
test_split: null
template_aliases: ""
doc_to_text: "{{context}}"
doc_to_target: "{{completion}}"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
group: include: arithmetic_1dc.yaml
- arithmetic
task: arithmetic_3ds task: arithmetic_3ds
dataset_path: EleutherAI/arithmetic
dataset_name: arithmetic_3ds dataset_name: arithmetic_3ds
output_type: loglikelihood
validation_split: validation
test_split: null
template_aliases: ""
doc_to_text: "{{context}}"
doc_to_target: "{{completion}}"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
group: include: arithmetic_1dc.yaml
- arithmetic
task: arithmetic_4da task: arithmetic_4da
dataset_path: EleutherAI/arithmetic
dataset_name: arithmetic_4da dataset_name: arithmetic_4da
output_type: loglikelihood
validation_split: validation
test_split: null
template_aliases: ""
doc_to_text: "{{context}}"
doc_to_target: "{{completion}}"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
group: include: arithmetic_1dc.yaml
- arithmetic
task: arithmetic_4ds task: arithmetic_4ds
dataset_path: EleutherAI/arithmetic
dataset_name: arithmetic_4ds dataset_name: arithmetic_4ds
output_type: loglikelihood
validation_split: validation
test_split: null
template_aliases: ""
doc_to_text: "{{context}}"
doc_to_target: "{{completion}}"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
group: include: arithmetic_1dc.yaml
- arithmetic
task: arithmetic_5da task: arithmetic_5da
dataset_path: EleutherAI/arithmetic
dataset_name: arithmetic_5da dataset_name: arithmetic_5da
output_type: loglikelihood
validation_split: validation
test_split: null
template_aliases: ""
doc_to_text: "{{context}}"
doc_to_target: "{{completion}}"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
group: include: arithmetic_1dc.yaml
- arithmetic
task: arithmetic_5ds task: arithmetic_5ds
dataset_path: EleutherAI/arithmetic
dataset_name: arithmetic_5ds dataset_name: arithmetic_5ds
output_type: loglikelihood
validation_split: validation
test_split: null
template_aliases: ""
doc_to_text: "{{context}}"
doc_to_target: "{{completion}}"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
group: group:
- hendrycks_ethics - hendrycks_ethics
task: ethics_cm task: ethics_cm
dataset_path: hails/hendrycks_ethics dataset_path: EleutherAI/hendrycks_ethics
dataset_name: commonsense dataset_name: commonsense
output_type: multiple_choice output_type: multiple_choice
training_split: train training_split: train
......
include: commonsense.yaml include: commonsense.yaml
task: ethics_deontology task: ethics_deontology
dataset_path: hails/hendrycks_ethics
dataset_name: deontology dataset_name: deontology
doc_to_text: "Question: Would most people believe this reasonable or unreasonable to say? \"{{scenario}} {{excuse.rstrip()}}\"\nAnswer:" doc_to_text: "Question: Would most people believe this reasonable or unreasonable to say? \"{{scenario}} {{excuse.rstrip()}}\"\nAnswer:"
doc_to_target: label doc_to_target: label
......
...@@ -3,6 +3,5 @@ group: ...@@ -3,6 +3,5 @@ group:
- hendrycks_ethics - hendrycks_ethics
task: ethics_justice task: ethics_justice
dataset_name: justice dataset_name: justice
output_type: multiple_choice
doc_to_text: "Question: Would most people believe this reasonable or unreasonable to say? \"{{scenario}}\"\nAnswer:" doc_to_text: "Question: Would most people believe this reasonable or unreasonable to say? \"{{scenario}}\"\nAnswer:"
# TODO: impl. exact match for this and deontology # TODO: impl. exact match for this and deontology
...@@ -2,11 +2,7 @@ include: commonsense.yaml ...@@ -2,11 +2,7 @@ include: commonsense.yaml
group: group:
- hendrycks_ethics - hendrycks_ethics
task: ethics_utilitarianism task: ethics_utilitarianism
dataset_path: hails/hendrycks_ethics
dataset_name: utilitarianism dataset_name: utilitarianism
output_type: multiple_choice
training_split: train
test_split: test
doc_to_text: !function utils.doc_to_text doc_to_text: !function utils.doc_to_text
doc_to_target: !function utils.doc_to_target doc_to_target: !function utils.doc_to_target
doc_to_choice: ['no', 'yes'] doc_to_choice: ['no', 'yes']
......
...@@ -7,7 +7,6 @@ dataset_path: EleutherAI/lambada_openai ...@@ -7,7 +7,6 @@ dataset_path: EleutherAI/lambada_openai
dataset_name: default dataset_name: default
output_type: loglikelihood output_type: loglikelihood
test_split: test test_split: test
template_aliases: ""
doc_to_text: "{{text.split(' ')[:-1]|join(' ')}}" doc_to_text: "{{text.split(' ')[:-1]|join(' ')}}"
doc_to_target: "{{' '+text.split(' ')[-1]}}" doc_to_target: "{{' '+text.split(' ')[-1]}}"
should_decontaminate: true should_decontaminate: true
......
...@@ -8,7 +8,6 @@ dataset_name: null ...@@ -8,7 +8,6 @@ dataset_name: null
output_type: loglikelihood output_type: loglikelihood
validation_split: validation validation_split: validation
test_split: test test_split: test
template_aliases: ""
doc_to_text: "{{text.split(' ')[:-1]|join(' ')}}" doc_to_text: "{{text.split(' ')[:-1]|join(' ')}}"
doc_to_target: "{{' '+text.split(' ')[-1]}}" doc_to_target: "{{' '+text.split(' ')[-1]}}"
should_decontaminate: true should_decontaminate: true
......
...@@ -6,7 +6,6 @@ dataset_path: EleutherAI/lambada_openai ...@@ -6,7 +6,6 @@ dataset_path: EleutherAI/lambada_openai
dataset_name: default dataset_name: default
output_type: loglikelihood output_type: loglikelihood
test_split: test test_split: test
template_aliases: ""
doc_to_text: "{{text.split(' ')[:-1]|join(' ')}} ____. ->" doc_to_text: "{{text.split(' ')[:-1]|join(' ')}} ____. ->"
doc_to_target: "{{' '+text.split(' ')[-1]}}" doc_to_target: "{{' '+text.split(' ')[-1]}}"
should_decontaminate: true should_decontaminate: true
......
...@@ -7,7 +7,6 @@ dataset_name: null ...@@ -7,7 +7,6 @@ dataset_name: null
output_type: loglikelihood output_type: loglikelihood
validation_split: validation validation_split: validation
test_split: test test_split: test
template_aliases: ""
doc_to_text: "{{text.split(' ')[:-1]|join(' ')}} ____. ->" doc_to_text: "{{text.split(' ')[:-1]|join(' ')}} ____. ->"
doc_to_target: "{{' '+text.split(' ')[-1]}}" doc_to_target: "{{' '+text.split(' ')[-1]}}"
should_decontaminate: true should_decontaminate: true
......
...@@ -7,7 +7,6 @@ dataset_path: EleutherAI/lambada_openai ...@@ -7,7 +7,6 @@ dataset_path: EleutherAI/lambada_openai
dataset_name: en dataset_name: en
output_type: loglikelihood output_type: loglikelihood
test_split: test test_split: test
template_aliases: ""
doc_to_text: "{{text.split(' ')[:-1]|join(' ')}}" doc_to_text: "{{text.split(' ')[:-1]|join(' ')}}"
doc_to_target: "{{' '+text.split(' ')[-1]}}" doc_to_target: "{{' '+text.split(' ')[-1]}}"
should_decontaminate: true should_decontaminate: true
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment