Unverified Commit 9e03d9d0 authored by seungduk.kim.2304's avatar seungduk.kim.2304 Committed by GitHub
Browse files

Fix Column Naming and Dataset Naming Conventions in K-MMLU Evaluation (#1171)



* Correct column names and dataset names

* Remove kmmlu_general_physics.yaml and kmmlu_korean_language.yaml

* Update _default_kmmlu_yaml

---------
Co-authored-by: default avatarHailey Schoelkopf <65563625+haileyschoelkopf@users.noreply.github.com>
parent 78545d42
"dataset_name": "Materials Engineering"
"dataset_name": "Materials-Engineering"
"include": "_default_kmmlu_yaml"
"task": "kmmlu_materials_engineering"
"dataset_name": "Mechanical Engineering"
"dataset_name": "Mechanical-Engineering"
"include": "_default_kmmlu_yaml"
"task": "kmmlu_mechanical_engineering"
"dataset_name": "Nondestructive Testing"
"dataset_name": "Nondestructive-Testing"
"include": "_default_kmmlu_yaml"
"task": "kmmlu_nondestructive_testing"
"dataset_name": "Political Science and Sociology"
"dataset_name": "Political-Science-and-Sociology"
"include": "_default_kmmlu_yaml"
"task": "kmmlu_political_science_and_sociology"
"dataset_name": "Public Safety"
"dataset_name": "Public-Safety"
"include": "_default_kmmlu_yaml"
"task": "kmmlu_public_safety"
"dataset_name": "Railway and Automotive Engineering"
"dataset_name": "Railway-and-Automotive-Engineering"
"include": "_default_kmmlu_yaml"
"task": "kmmlu_railway_and_automotive_engineering"
"dataset_name": "Real Estate"
"dataset_name": "Real-Estate"
"include": "_default_kmmlu_yaml"
"task": "kmmlu_real_estate"
"dataset_name": "Refrigerating Machinery"
"dataset_name": "Refrigerating-Machinery"
"include": "_default_kmmlu_yaml"
"task": "kmmlu_refrigerating_machinery"
"dataset_name": "Social Welfare"
"dataset_name": "Social-Welfare"
"include": "_default_kmmlu_yaml"
"task": "kmmlu_social_welfare"
"dataset_name": "Telecommunications and Wireless Technology"
"dataset_name": "Telecommunications-and-Wireless-Technology"
"include": "_default_kmmlu_yaml"
"task": "kmmlu_telecommunications_and_wireless_technology"
......@@ -7,7 +7,7 @@ def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
f"다음을 읽고 정답으로 알맞은 것을 고르시요.\n"
f"### Question: {doc['question']}\n"
f"### Options:\n"
f"(1) {doc['option#1']}\n(2) {doc['option#2']}\n(3) {doc['option#3']}\n(4) {doc['option#4']}\n"
f"(1) {doc['A']}\n(2) {doc['B']}\n(3) {doc['C']}\n(4) {doc['D']}\n"
f"### Answer: 주어진 문제의 정답은"
)
out_doc = {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment