Commit 33f2f9bf authored by lintangsutawika's avatar lintangsutawika
Browse files

Merge branch 'big-refactor' of https://github.com/EleutherAI/lm-evaluation-harness into superglue

parents e1fdf2a8 7634a6ec
include: default_et.yaml
task: xcopa_tr
dataset_name: tr
doc_to_text: !function utils.doc_to_text_tr
include: default_et.yaml
task: xcopa_vi
dataset_name: vi
doc_to_text: !function utils.doc_to_text_vi
include: default_et.yaml
task: xcopa_zh
dataset_name: zh
doc_to_text: !function utils.doc_to_text_zh
from functools import partial
def convert_choice(choice):
return choice[0].lower() + choice[1:]
def doc_to_text(doc, connector):
# Drop the period
conn = connector[doc["question"]]
return doc["premise"].strip()[:-1] + f" {conn}"
def doc_to_choice(doc):
return [convert_choice(doc["choice1"]), convert_choice(doc["choice2"])]
doc_to_text_et = partial(
doc_to_text,
connector={
"cause": "sest",
"effect": "seetõttu",
},
)
doc_to_text_ht = partial(
doc_to_text,
connector={
"cause": "poukisa",
"effect": "donk sa",
},
)
doc_to_text_it = partial(
doc_to_text,
connector={
"cause": "perché",
"effect": "quindi",
},
)
doc_to_text_id = partial(
doc_to_text,
connector={
"cause": "karena",
"effect": "maka",
},
)
doc_to_text_qu = partial(
doc_to_text,
connector={
"cause": "imataq",
"effect": "chaymi",
},
)
doc_to_text_sw = partial(
doc_to_text,
connector={
"cause": "kwa sababu",
"effect": "kwa hiyo",
},
)
doc_to_text_zh = partial(
doc_to_text,
connector={
"cause": "因为",
"effect": "所以",
},
)
doc_to_text_ta = partial(
doc_to_text,
connector={
"cause": "காரணமாக",
"effect": "எனவே",
},
)
doc_to_text_th = partial(
doc_to_text,
connector={
"cause": "เพราะ",
"effect": "ดังนั้น",
},
)
doc_to_text_tr = partial(
doc_to_text,
connector={
"cause": "çünkü",
"effect": "bu yüzden",
},
)
doc_to_text_vi = partial(
doc_to_text,
connector={
"cause": "bởi vì",
"effect": "vì vậy",
},
)
......@@ -456,7 +456,7 @@ env = Environment(loader=BaseLoader, undefined=StrictUndefined)
env.filters["regex_replace"] = regex_replace
def apply_template(template, doc):
def apply_template(template: str, doc: dict) -> str:
rtemplate = env.from_string(template)
return rtemplate.render(**doc)
......
......@@ -32,7 +32,7 @@ def parse_args():
default=None,
help="Number of examples in few-shot context",
)
parser.add_argument("--batch_size", type=int, default=1) # TODO: only integers
parser.add_argument("--batch_size", type=str, default=1)
parser.add_argument(
"--max_batch_size",
type=int,
......
......@@ -15,7 +15,7 @@ setuptools.setup(
packages=setuptools.find_packages(),
# required to include yaml files in pip installation
package_data={
"lm_eval": ["**/*.yaml"],
"lm_eval": ["**/*.yaml", "tasks/**/*"],
"examples": ["**/*.yaml"],
},
entry_points={
......@@ -36,7 +36,6 @@ setuptools.setup(
"evaluate>=0.4.0",
"jsonlines",
"numexpr",
"openai>=0.6.4",
"omegaconf>=2.2",
"peft>=0.2.0",
"pybind11>=2.6.2",
......@@ -67,5 +66,6 @@ setuptools.setup(
],
"gptq": ["auto-gptq[triton] @ git+https://github.com/PanQiWei/AutoGPTQ"],
"anthropic": ["anthropic"],
"openai": ["openai", "tiktoken"],
},
)
......@@ -92,7 +92,7 @@ class TestNewTasks:
if task.has_test_docs()
else list(islice(task.validation_docs(), limit))
)
if "multiple_choice" in task._config.group:
if "multiple_choice" in task._config.output_type:
_array = [task.doc_to_choice(doc) for doc in arr]
# assert all(len(x) == 4 for x in _array)
assert all(isinstance(x, list) for x in _array)
......@@ -106,8 +106,8 @@ class TestNewTasks:
else list(islice(task.validation_docs(), limit))
)
_array_target = [task.doc_to_target(doc) for doc in arr]
assert all(isinstance(label, int) for label in _array_target)
assert len(_array_target) == limit if limit else True
if task._config.output_type == "multiple_choice":
assert all(isinstance(label, int) for label in _array_target)
# _array_text = [task.doc_to_text(doc) for doc in arr]
# Not working
# assert all(tgt[0] == " " or txt[-1] == "\n" if len(txt) != 0 else True for txt, tgt in zip(_array_text, _array_target))
......@@ -116,6 +116,7 @@ class TestNewTasks:
task_class().build_all_requests(rank=1, limit=limit, world_size=1)
assert task_class.instances is not None
# ToDO: Add proper testing
def test_construct_requests(self, task_class, limit):
task = task_class()
arr = (
......@@ -124,5 +125,5 @@ class TestNewTasks:
else list(islice(task.validation_docs(), limit))
)
requests = [task.construct_requests(doc, task.doc_to_text(doc)) for doc in arr]
assert all(isinstance(doc, list) for doc in requests)
# assert all(isinstance(doc, list) for doc in requests)
assert len(requests) == limit if limit else True
......@@ -83,7 +83,7 @@ def test_create_choices(task_class, limit):
if task.has_test_docs()
else list(islice(task.validation_docs(), limit))
)
if "multiple_choice" in task._config.group:
if "multiple_choice" in task._config.output_type:
_array = [task.doc_to_choice(doc) for doc in arr]
# assert all(len(x) == 4 for x in _array)
assert all(isinstance(x, list) for x in _array)
......@@ -98,8 +98,8 @@ def test_doc_to_target(task_class, limit):
else list(islice(task.validation_docs(), limit))
)
_array_target = [task.doc_to_target(doc) for doc in arr]
assert all(isinstance(label, int) for label in _array_target)
assert len(_array_target) == limit if limit else True
if task._config.output_type == "multiple_choice":
assert all(isinstance(label, int) for label in _array_target)
# _array_text = [task.doc_to_text(doc) for doc in arr]
# Not working
# assert all(tgt[0] == " " or txt[-1] == "\n" if len(txt) != 0 else True for txt, tgt in zip(_array_text, _array_target))
......@@ -110,6 +110,7 @@ def test_build_all_requests(task_class, limit):
assert task_class.instances is not None
# ToDO: Add proper testing
def test_construct_requests(task_class, limit):
task = task_class()
arr = (
......@@ -118,7 +119,7 @@ def test_construct_requests(task_class, limit):
else list(islice(task.validation_docs(), limit))
)
requests = [task.construct_requests(doc, task.doc_to_text(doc)) for doc in arr]
assert all(isinstance(doc, list) for doc in requests)
# assert all(isinstance(doc, list) for doc in requests)
assert len(requests) == limit if limit else True
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment