Unverified Commit f27c4050 authored by Zehan Li's avatar Zehan Li Committed by GitHub
Browse files

Fix m_arc choices (#1760)



* Update utils.py

This is a 4-choice task, option_e is null for all but 3 samples

* Fix options

Adaptive choices

* add option e

* bump multilingual arc version

---------
Co-authored-by: default avatarHailey Schoelkopf <65563625+haileyschoelkopf@users.noreply.github.com>
parent b898bdaa
...@@ -20,4 +20,4 @@ metric_list: ...@@ -20,4 +20,4 @@ metric_list:
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata: metadata:
version: 1.0 version: 2.0
...@@ -4,8 +4,6 @@ import datasets ...@@ -4,8 +4,6 @@ import datasets
def preprocess(text): def preprocess(text):
if text is None:
return " "
text = text.strip() text = text.strip()
text = text.replace(" [title]", ". ") text = text.replace(" [title]", ". ")
text = re.sub("\\[.*?\\]", "", text) text = re.sub("\\[.*?\\]", "", text)
...@@ -20,11 +18,15 @@ def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: ...@@ -20,11 +18,15 @@ def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
"id": doc["id"], "id": doc["id"],
"query": "Question: " + preprocess(doc["instruction"]) + "\nAnswer:", "query": "Question: " + preprocess(doc["instruction"]) + "\nAnswer:",
"choices": [ "choices": [
preprocess(doc["option_a"]), preprocess(option)
preprocess(doc["option_b"]), for option in [
preprocess(doc["option_c"]), doc["option_a"],
preprocess(doc["option_d"]), doc["option_b"],
preprocess(doc["option_e"]), doc["option_c"],
doc["option_d"],
doc["option_e"],
]
if option
], ],
"gold": ["A", "B", "C", "D", "E"].index(doc["answer"]), "gold": ["A", "B", "C", "D", "E"].index(doc["answer"]),
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment