Unverified Commit 3d1b8f43 authored by Lintang Sutawika's avatar Lintang Sutawika Committed by GitHub
Browse files

Merge branch 'main' into group-agg-rework

parents e200c24e d855d0ba
include: _paloma_template
task: paloma_dolma-v1_5
task_alias: Dolma V1.5
dataset_name: dolma-v1_5
include: _paloma_template
task: paloma_dolma_100_programing_languages
task_alias: 100 PLs
dataset_name: dolma_100_programing_languages
include: _paloma_template
task: paloma_dolma_100_subreddits
task_alias: 100 Subreddits
dataset_name: dolma_100_subreddits
include: _paloma_template
task: paloma_falcon-refinedweb
task_alias: Falcon
dataset_name: falcon-refinedweb
include: _paloma_template
task: paloma_gab
task_alias: Gab
dataset_name: gab
include: _paloma_template
task: paloma_m2d2_s2orc_unsplit
task_alias: M2D2 S2ORC
dataset_name: m2d2_s2orc_unsplit
include: _paloma_template
task: paloma_m2d2_wikipedia_unsplit
task_alias: M2D2 Wikipedia
dataset_name: m2d2_wikipedia_unsplit
include: _paloma_template
task: paloma_manosphere_meta_sep
task_alias: Manosphere
dataset_name: manosphere_meta_sep
include: _paloma_template
task: paloma_mc4
task_alias: mC4
dataset_name: mc4
include: _paloma_template
task: paloma_ptb
task_alias: PTB
dataset_name: ptb
include: _paloma_template
task: paloma_redpajama
task_alias: RedPajama
dataset_name: redpajama
include: _paloma_template
task: paloma_twitterAAE_HELM_fixed
task_alias: Twitter AAE
dataset_name: twitterAAE_HELM_fixed
def doc_to_target(doc):
return str(doc["text"])
include: _paloma_template
task: paloma_wikitext_103
task_alias: Wikitext-103
dataset_name: wikitext_103
......@@ -19,3 +19,5 @@ metric_list:
higher_is_better: true
metadata:
version: 1.0
dataset_kwargs:
trust_remote_code: true
......@@ -4,12 +4,12 @@ from functools import reduce
import numpy as np
import transformers.data.metrics.squad_metrics as squad_metrics
from datasets import load_metric
from datasets import Dataset, load_metric
from transformers import AutoTokenizer
from lm_eval.api.instance import Instance
from lm_eval.api.metrics import mean
from lm_eval.api.task import Task
from lm_eval.api.task import ConfigurableTask
_CITATION = """
......@@ -108,7 +108,7 @@ def _num_cpu_cores():
return len(os.sched_getaffinity(0))
class _SCROLLSTask(Task):
class _SCROLLSTask(ConfigurableTask):
VERSION = 2
DATASET_PATH = "tau/scrolls"
DATASET_NAME = None
......@@ -117,7 +117,7 @@ class _SCROLLSTask(Task):
PRUNE_NUM_PROC = None
def __init__(self):
super().__init__()
super().__init__(config={"metadata": {"version": self.VERSION}})
if self.DATASET_NAME is not None:
self.metric = load_metric(_download_metric(), config_name=self.DATASET_NAME)
......@@ -131,12 +131,26 @@ class _SCROLLSTask(Task):
return False
def training_docs(self):
for doc in self.dataset["train"]:
yield from self._process_doc(doc)
processed_docs = list(map(self._process_doc, self.dataset["train"]))
# Flatten the list of lists since _process_doc returns a list of one element.
processed_docs = [item for sublist in processed_docs for item in sublist]
processed_dict = {
key: [d[key] for d in processed_docs] for key in processed_docs[0]
}
return Dataset.from_dict(processed_dict)
def validation_docs(self):
for doc in self.dataset["validation"]:
yield from self._process_doc(doc)
processed_docs = list(map(self._process_doc, self.dataset["validation"]))
# Flatten the list of lists since _process_doc returns a list of one element.
processed_docs = [item for sublist in processed_docs for item in sublist]
processed_dict = {
key: [d[key] for d in processed_docs] for key in processed_docs[0]
}
return Dataset.from_dict(processed_dict)
def should_decontaminate(self):
return True
......
......@@ -6,10 +6,7 @@ training_split: train
validation_split: validation
doc_to_text: "Q: {{context}} {{question}}\nA:"
target_delimiter: " "
doc_to_choice:
- "{{answerA}}"
- "{{answerB}}"
- "{{answerC}}"
doc_to_choice: "{{[answerA, answerB, answerC]}}"
doc_to_target: "{{ (label|int) - 1 }}"
metric_list:
- metric: acc
......
"""
"""
import re
from typing import List
......
......@@ -13,6 +13,7 @@ also determine when no answer is supported by the paragraph and abstain from ans
Homepage: https://rajpurkar.github.io/SQuAD-explorer/
"""
from functools import partial
from math import exp
......
""" This code mirrors the utils of the original winogrande task """
"""This code mirrors the utils of the original winogrande task"""
def doc_to_text(doc):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment