Unverified Commit dc544beb authored by Lintang Sutawika's avatar Lintang Sutawika Committed by GitHub
Browse files

Merge pull request #813 from EleutherAI/asdiv

[Refactor] Asdiv
parents 084a9119 25a667f5
......@@ -90,6 +90,12 @@ class TaskConfig(dict):
def __post_init__(self):
if "." in self.dataset_path:
import inspect
from importlib import import_module
self.dataset_path = inspect.getfile(import_module(self.dataset_path))
if self.generation_kwargs is not None:
if self.output_type != "greedy_until":
eval_logger.warning(
......@@ -792,7 +798,7 @@ class ConfigurableTask(Task):
return doc[doc_to_text]
else:
text_string = utils.apply_template(doc_to_text, doc)
if text_string.isdigit():
if text_string.isdigit() and self._config.doc_to_choice is not None:
return ast.literal_eval(text_string)
else:
return text_string
......@@ -827,7 +833,7 @@ class ConfigurableTask(Task):
return doc[doc_to_target]
else:
target_string = utils.apply_template(doc_to_target, doc)
if target_string.isdigit():
if target_string.isdigit() and self._config.doc_to_choice is not None:
return ast.literal_eval(target_string)
elif (
len(target_string) >= 2
......
......@@ -38,7 +38,7 @@ Boxes should be checked iff tasks are implemented in the refactor and tested for
- [x] TruthfulQA (gen)
- [ ] MuTual
- [ ] Hendrycks Math (Hailey)
- [ ] Asdiv
- [x] Asdiv
- [ ] GSM8k
- [x] Arithmetic
- [ ] MMMLU (Hailey)
......
task: asdiv
dataset_path: EleutherAI/asdiv
output_type: loglikelihood
validation_split: validation
doc_to_text: "{{body}}\nQuestion:{{question}}\nAnswer:"
doc_to_target: "{{answer.split(' (')[0]}}"
should_decontaminate: true
doc_to_decontamination_query: "{{body}} {{question}}"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment