Unverified Commit 4721379e authored by Hailey Schoelkopf's avatar Hailey Schoelkopf Committed by GitHub
Browse files

Merge branch 'big-refactor' into mypy

parents a551c789 cc7828dd
task: wsc273
dataset_path: winograd_wsc
dataset_name: wsc273
output_type: multiple_choice
test_split: test
doc_to_text: label
process_docs: !function utils.process_doc
doc_to_target: "{% set index = pronoun_loc + pronoun | length %}{{text[index:]}}"
doc_to_choice: "{% set template = text[:pronoun_loc] %}{{[template+options[0], template+options[1]]}}"
should_decontaminate: true
doc_to_decontamination_query: text
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
upper_pronouns = [
"A",
"An",
"The",
"She",
"He",
"It",
"They",
"My",
"His",
"Her",
"Their",
]
def process_doc(dataset):
def process_fn(doc):
# The HF implementation of `wsc273` is not `partial evaluation` friendly.
doc["text"] = doc["text"].replace(" ", " ")
doc["options"][0] = __normalize_option(doc, doc["options"][0])
doc["options"][1] = __normalize_option(doc, doc["options"][1])
return doc
return dataset.map(process_fn)
def __normalize_option(doc, option):
# Append `'s` to possessive determiner based options.
if doc["pronoun"].lower() in ["my", "his", "her", "our", "their"]:
option += "'s"
# Appropriately lowercase the pronoun in the option.
pronoun = option.split()[0]
start_of_sentence = doc["text"][doc["pronoun_loc"] - 2] == "."
if not start_of_sentence and pronoun in upper_pronouns:
return option.replace(pronoun, pronoun.lower())
return option
...@@ -11,6 +11,7 @@ from lm_eval import evaluator, utils ...@@ -11,6 +11,7 @@ from lm_eval import evaluator, utils
from lm_eval.api.registry import ALL_TASKS from lm_eval.api.registry import ALL_TASKS
from lm_eval.logger import eval_logger from lm_eval.logger import eval_logger
from lm_eval.tasks import include_task_folder from lm_eval.tasks import include_task_folder
from lm_eval.benchmarks import include_benchmarks
os.environ["TOKENIZERS_PARALLELISM"] = "false" os.environ["TOKENIZERS_PARALLELISM"] = "false"
......
...@@ -53,7 +53,7 @@ setuptools.setup( ...@@ -53,7 +53,7 @@ setuptools.setup(
], ],
python_requires=">=3.9", python_requires=">=3.9",
install_requires=[ install_requires=[
"accelerate>=0.18.0", "accelerate>=0.21.0",
"evaluate", "evaluate",
"datasets>=2.0.0", "datasets>=2.0.0",
"evaluate>=0.4.0", "evaluate>=0.4.0",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment