Commit 12852d5c authored by Leo Gao's avatar Leo Gao
Browse files

Implement ARC (data)

parent 9e29c2a8
from . import superglue
from . import glue
from . import arc
from . import race
TASK_REGISTRY = {
"cola": glue.CoLA,
......@@ -19,6 +20,7 @@ TASK_REGISTRY = {
"wsc": superglue.WinogradSchemaChallenge,
"arc_easy": arc.ARCEasy,
"arc_challenge": arc.ARCChallenge,
"race": race.RACE,
}
......
from . common import HFNLPTask
from ..utils_stream import X, each, apply, join, filt, one
import collections
import nlp
class RACE(HFNLPTask):
NLP_PATH = "race"
NLP_NAME = "high"
cache = {}
def has_training_docs(self):
return True
def has_validation_docs(self):
return True
def has_test_docs(self):
return True
def _collate_data(self, set):
if set in self.cache: return self.cache[set]
# One big issue with HF's implementation of this dataset: it makes a
# separate document for each question; meanwhile, in the GPT3 paper it
# is shown that one document is made per passage.
r = collections.defaultdict(list)
for item in nlp.load_dataset(path=self.NLP_PATH, name=self.NLP_NAME)[set]:
r[item['article']].append(item)
res = list(r.values() >> each(lambda x: {
'article': x[0]['article'],
'problems': x >> each(lambda y: {
'question': y['question'],
'answer': y['answer'],
'options': y['options'],
})
}))
self.cache[set] = res
return res
def training_docs(self):
return self._collate_data("train")
def validation_docs(self):
return self._collate_data("validation")
def test_docs(self):
return self._collate_data("test")
def fewshot_description(self):
# TODO: figure out description
return ""
def doc_to_text(self, doc, include_target=True):
print(doc)
r = "Article:\n" + doc['article'] + '\n\n'
r += doc['problems'] >> each(
lambda x: 'Q: ' + x['question'] + '\n\nA: ' + x['options'][['A', 'B', 'C', 'D'].index(x['answer'])]) \
>> join('\n\n')
return r
def evaluate(self, docs, lm, provide_description, num_fewshot):
# TODO: implement
raise NotImplementedError()
\ No newline at end of file
import os
from functools import reduce
import operator
import lm_dataformat as lmd
from tqdm import tqdm
import json
class ExitCodeError(Exception): pass
def sh(x):
if os.system(x): raise ExitCodeError()
def ls(x):
return [x + '/' + fn for fn in os.listdir(x)]
def lsr(x):
if os.path.isdir(x):
return reduce(operator.add, map(lsr, ls(x)), [])
else:
return [x]
def fwrite(fname, content):
with open(fname, 'w') as fh:
fh.write(content)
def fread(fname):
with open(fname) as fh:
return fh.read()
class each:
def __init__(self, f):
self.f = f
def __rrshift__(self, other):
return list(map(self.f, other))
class filt:
def __init__(self, f):
self.f = f
def __rrshift__(self, other):
return list(filter(self.f, other))
class apply:
def __init__(self, f):
self.f = f
def __rrshift__(self, other):
return self.f(other)
class one:
def __rrshift__(self, other):
try:
if isinstance(other, list):
assert len(other) == 1
return other[0]
return next(other)
except:
return None
class join:
def __init__(self, sep):
self.sep = sep
def __rrshift__(self, other):
if other is None: return
try:
return self.sep.join(other)
except:
return None
Y = object()
def id(x):
return x
class Reflective:
def __getattribute__(self, f):
def _fn(*args, **kwargs):
return lambda x: x.__getattribute__(f)(*args, **kwargs)
return _fn
def __getitem__(self, a):
return lambda x: x[a]
def __mul__(self, other):
if other == Y:
def _f(x, y=None):
if y == None:
x, y = x
return x * y
return _f
return lambda x: x * other
def __rmul__(self, other):
if other == Y:
def _f(x, y=None):
if y == None:
x, y = x
return y * x
return _f
return lambda x: other * x
def __add__(self, other):
if other == Y:
def _f(x, y=None):
if y == None:
x, y = x
return x + y
return _f
return lambda x: x + other
def __radd__(self, other):
if other == Y:
def _f(x, y=None):
if y == None:
x, y = x
return y + x
return _f
return lambda x: other + x
# (b -> a -> b) -> b -> [a] -> b
def foldl(f, init, arr):
curr = init
for elem in arr:
curr = f(curr, elem)
return curr
# (a -> b -> b) -> b -> [a] -> b
def foldr(f, init, arr):
curr = init
for elem in arr[::-1]:
curr = f(elem, curr)
return curr
def comp(*fs):
if len(fs) == 1:
return fs[0]
def _f(x):
for f in fs[::-1]:
x = f(x)
return x
return _f
X = Reflective()
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment