"vscode:/vscode.git/clone" did not exist on "32d1a00017cd2219173945cd56e6409279767d7e"
Unverified Commit 801322e0 authored by Steven Basart's avatar Steven Basart Committed by GitHub
Browse files

Fixes scrolls task bug with few_shot examples (#2003)

Bug:

```
python -m scripts.write_out --task scrolls_quality --output_base_path ~/workspace/
Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/lm-evaluation-harness/scripts/write_out.py", line 92, in <module>
    main()
  File "/lm-evaluation-harness/scripts/write_out.py", line 51, in main
    task_dict = tasks.get_task_dict(task_names, task_manager)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/lm-evaluation-harness/lm_eval/tasks/__init__.py", line 423, in get_task_dict
    task_name_from_string_dict = task_manager.load_task_or_group(
                                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/lm-evaluation-harness/lm_eval/tasks/__init__.py", line 271, in load_task_or_group
    collections.ChainMap(*map(self._load_individual_task_or_group, task_list))
  File "/lm-evaluation-harness/lm_eval/tasks/__init__.py", line 162, in _load_individual_task_or_group
    return load_task(task_config, task=name_or_config, group=parent_name)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/lm-evaluation-harness/lm_eval/tasks/__init__.py", line 148, in load_task
    task_object = config["class"]()
                  ^^^^^^^^^^^^^^^^^
  File "/lm-evaluation-harness/lm_eval/tasks/scrolls/task.py", line 120, in __init__
    super().__init__()
  File "/lm-evaluation-harness/lm_eval/api/task.py", line 703, in __init__
    self._config = TaskConfig(**config)
                   ^^^^^^^^^^^^^^^^^^^^
TypeError: lm_eval.api.task.TaskConfig() argument after ** must be a mapping, not NoneType
```
parent e5e5ee0c
......@@ -4,12 +4,12 @@ from functools import reduce
import numpy as np
import transformers.data.metrics.squad_metrics as squad_metrics
from datasets import load_metric
from datasets import Dataset, load_metric
from transformers import AutoTokenizer
from lm_eval.api.instance import Instance
from lm_eval.api.metrics import mean
from lm_eval.api.task import Task
from lm_eval.api.task import ConfigurableTask
_CITATION = """
......@@ -108,7 +108,7 @@ def _num_cpu_cores():
return len(os.sched_getaffinity(0))
class _SCROLLSTask(Task):
class _SCROLLSTask(ConfigurableTask):
VERSION = 2
DATASET_PATH = "tau/scrolls"
DATASET_NAME = None
......@@ -117,7 +117,7 @@ class _SCROLLSTask(Task):
PRUNE_NUM_PROC = None
def __init__(self):
super().__init__()
super().__init__(config={"metadata": {"version": self.VERSION}})
if self.DATASET_NAME is not None:
self.metric = load_metric(_download_metric(), config_name=self.DATASET_NAME)
......@@ -131,12 +131,26 @@ class _SCROLLSTask(Task):
return False
def training_docs(self):
for doc in self.dataset["train"]:
yield from self._process_doc(doc)
processed_docs = list(map(self._process_doc, self.dataset["train"]))
# Flatten the list of lists since _process_doc returns a list of one element.
processed_docs = [item for sublist in processed_docs for item in sublist]
processed_dict = {
key: [d[key] for d in processed_docs] for key in processed_docs[0]
}
return Dataset.from_dict(processed_dict)
def validation_docs(self):
for doc in self.dataset["validation"]:
yield from self._process_doc(doc)
processed_docs = list(map(self._process_doc, self.dataset["validation"]))
# Flatten the list of lists since _process_doc returns a list of one element.
processed_docs = [item for sublist in processed_docs for item in sublist]
processed_dict = {
key: [d[key] for d in processed_docs] for key in processed_docs[0]
}
return Dataset.from_dict(processed_dict)
def should_decontaminate(self):
return True
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment