Commit 1b14602e authored by h-albert-lee's avatar h-albert-lee
Browse files

implementing kmmlu

parent e5dfd030
dataset_name: Public Safety
include: _default_kmmlu_yaml
task: kmmlu_public safety
dataset_name: Railway and Automotive Engineering
include: _default_kmmlu_yaml
task: kmmlu_railway and automotive engineering
dataset_name: Real Estate
include: _default_kmmlu_yaml
task: kmmlu_real estate
dataset_name: Refrigerating Machinery
include: _default_kmmlu_yaml
task: kmmlu_refrigerating machinery
dataset_name: Social Welfare
include: _default_kmmlu_yaml
task: kmmlu_social welfare
dataset_name: Taxation
include: _default_kmmlu_yaml
task: kmmlu_taxation
dataset_name: Telecommunications and Wireless Technology
include: _default_kmmlu_yaml
task: kmmlu_telecommunications and wireless technology
import datasets
def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
def _process_doc(doc):
instruction = f"""다음을 읽고 정답으로 알맞은 것을 고르시요.
### Context: {doc["context"]}
### Question: {doc["question"]}
### Options:
(1) {doc['option#1']}\n(2) {doc["option#2"]}\n(3) {doc["option#3"]}\n(4) {doc['option#4']}}
### Answer: 주어진 문제의 정답은"""
out_doc = {
"question": instruction,
"choices": ["(1)", "(2)", "(3)", "(4)"],
"gold": int(doc["gold"]) - 1,
}
return out_doc
return dataset.map(_process_doc)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment