Commit d62ce606 authored by Yen-Ting Lin's avatar Yen-Ting Lin
Browse files

add tmlu

parent 1ce8c97a
"dataset_name": "GSAT_civics"
"description": "以下為學測公民的單選題,請提供正確答案的選項。\n\n"
"doc_to_choice":
- "A"
- "B"
- "C"
- "D"
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\n\
D. {{choices[3]}}{% if choices is defined and choices|length > 4 %}\nE. {{choices[4]}}{%\
\ endif %}{% if choices is defined and choices|length > 5 %}\nF. {{choices[5]}}{%\
\ endif %}\nAnswer:"
"group": "tmlu_social_sciences"
"group_alias": "social sciences"
"include": "_default_template_yaml"
"task": "tmlu_GSAT_civics"
"task_alias": "GSAT civics"
"dataset_name": "GSAT_earth_science"
"description": "以下為學測地球科學的單選題,請提供正確答案的選項。\n\n"
"doc_to_choice":
- "A"
- "B"
- "C"
- "D"
- "E"
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\n\
D. {{choices[3]}}{% if choices is defined and choices|length > 4 %}\nE. {{choices[4]}}{%\
\ endif %}{% if choices is defined and choices|length > 5 %}\nF. {{choices[5]}}{%\
\ endif %}\nAnswer:"
"group": "tmlu_STEM"
"group_alias": "STEM"
"include": "_default_template_yaml"
"task": "tmlu_GSAT_earth_science"
"task_alias": "GSAT earth science"
"dataset_name": "GSAT_geography"
"description": "以下為學測地理的單選題,請提供正確答案的選項。\n\n"
"doc_to_choice":
- "A"
- "B"
- "C"
- "D"
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\n\
D. {{choices[3]}}{% if choices is defined and choices|length > 4 %}\nE. {{choices[4]}}{%\
\ endif %}{% if choices is defined and choices|length > 5 %}\nF. {{choices[5]}}{%\
\ endif %}\nAnswer:"
"group": "tmlu_social_sciences"
"group_alias": "social sciences"
"include": "_default_template_yaml"
"task": "tmlu_GSAT_geography"
"task_alias": "GSAT geography"
"dataset_name": "GSAT_history"
"description": "以下為學測歷史的單選題,請提供正確答案的選項。\n\n"
"doc_to_choice":
- "A"
- "B"
- "C"
- "D"
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\n\
D. {{choices[3]}}{% if choices is defined and choices|length > 4 %}\nE. {{choices[4]}}{%\
\ endif %}{% if choices is defined and choices|length > 5 %}\nF. {{choices[5]}}{%\
\ endif %}\nAnswer:"
"group": "tmlu_humanities"
"group_alias": "humanities"
"include": "_default_template_yaml"
"task": "tmlu_GSAT_history"
"task_alias": "GSAT history"
"dataset_name": "accountant"
"description": "以下為會計師的單選題,請提供正確答案的選項。\n\n"
"doc_to_choice":
- "A"
- "B"
- "C"
- "D"
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\n\
D. {{choices[3]}}{% if choices is defined and choices|length > 4 %}\nE. {{choices[4]}}{%\
\ endif %}{% if choices is defined and choices|length > 5 %}\nF. {{choices[5]}}{%\
\ endif %}\nAnswer:"
"group": "tmlu_social_sciences"
"group_alias": "social sciences"
"include": "_default_template_yaml"
"task": "tmlu_accountant"
"task_alias": "accountant"
"dataset_name": "basic_traditional_chinese_medicine"
"description": "以下為中醫基礎醫學的單選題,請提供正確答案的選項。\n\n"
"doc_to_choice":
- "A"
- "B"
- "C"
- "D"
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\n\
D. {{choices[3]}}{% if choices is defined and choices|length > 4 %}\nE. {{choices[4]}}{%\
\ endif %}{% if choices is defined and choices|length > 5 %}\nF. {{choices[5]}}{%\
\ endif %}\nAnswer:"
"group": "tmlu_other"
"group_alias": "other"
"include": "_default_template_yaml"
"task": "tmlu_basic_traditional_chinese_medicine"
"task_alias": "basic traditional chinese medicine"
"dataset_name": "clinical_psychologist"
"description": "以下為臨床心理師的單選題,請提供正確答案的選項。\n\n"
"doc_to_choice":
- "A"
- "B"
- "C"
- "D"
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\n\
D. {{choices[3]}}{% if choices is defined and choices|length > 4 %}\nE. {{choices[4]}}{%\
\ endif %}{% if choices is defined and choices|length > 5 %}\nF. {{choices[5]}}{%\
\ endif %}\nAnswer:"
"group": "tmlu_social_sciences"
"group_alias": "social sciences"
"include": "_default_template_yaml"
"task": "tmlu_clinical_psychologist"
"task_alias": "clinical psychologist"
"dataset_name": "clinical_traditional_chinese_medicine"
"description": "以下為中醫針灸的單選題,請提供正確答案的選項。\n\n"
"doc_to_choice":
- "A"
- "B"
- "C"
- "D"
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\n\
D. {{choices[3]}}{% if choices is defined and choices|length > 4 %}\nE. {{choices[4]}}{%\
\ endif %}{% if choices is defined and choices|length > 5 %}\nF. {{choices[5]}}{%\
\ endif %}\nAnswer:"
"group": "tmlu_other"
"group_alias": "other"
"include": "_default_template_yaml"
"task": "tmlu_clinical_traditional_chinese_medicine"
"task_alias": "clinical traditional chinese medicine"
"dataset_name": "driving_rule"
"description": "以下為台灣駕駛規則的單選題,請提供正確答案的選項。\n\n"
"doc_to_choice":
- "A"
- "B"
- "C"
- "D"
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\n\
D. {{choices[3]}}{% if choices is defined and choices|length > 4 %}\nE. {{choices[4]}}{%\
\ endif %}{% if choices is defined and choices|length > 5 %}\nF. {{choices[5]}}{%\
\ endif %}\nAnswer:"
"group": "tmlu_Taiwan Specific"
"group_alias": "Taiwan Specific"
"include": "_default_template_yaml"
"task": "tmlu_driving_rule"
"task_alias": "driving rule"
"dataset_name": "lawyer_qualification"
"description": "以下為律師資格的單選題,請提供正確答案的選項。\n\n"
"doc_to_choice":
- "A"
- "B"
- "C"
- "D"
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\n\
D. {{choices[3]}}{% if choices is defined and choices|length > 4 %}\nE. {{choices[4]}}{%\
\ endif %}{% if choices is defined and choices|length > 5 %}\nF. {{choices[5]}}{%\
\ endif %}\nAnswer:"
"group": "tmlu_humanities"
"group_alias": "humanities"
"include": "_default_template_yaml"
"task": "tmlu_lawyer_qualification"
"task_alias": "lawyer qualification"
"dataset_name": "nutritionist"
"description": "以下為營養師的單選題,請提供正確答案的選項。\n\n"
"doc_to_choice":
- "A"
- "B"
- "C"
- "D"
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\n\
D. {{choices[3]}}{% if choices is defined and choices|length > 4 %}\nE. {{choices[4]}}{%\
\ endif %}{% if choices is defined and choices|length > 5 %}\nF. {{choices[5]}}{%\
\ endif %}\nAnswer:"
"group": "tmlu_other"
"group_alias": "other"
"include": "_default_template_yaml"
"task": "tmlu_nutritionist"
"task_alias": "nutritionist"
"dataset_name": "taiwan_tourist_resources"
"description": "以下為台灣觀光資源的單選題,請提供正確答案的選項。\n\n"
"doc_to_choice":
- "A"
- "B"
- "C"
- "D"
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\n\
D. {{choices[3]}}{% if choices is defined and choices|length > 4 %}\nE. {{choices[4]}}{%\
\ endif %}{% if choices is defined and choices|length > 5 %}\nF. {{choices[5]}}{%\
\ endif %}\nAnswer:"
"group": "tmlu_Taiwan Specific"
"group_alias": "Taiwan Specific"
"include": "_default_template_yaml"
"task": "tmlu_taiwan_tourist_resources"
"task_alias": "taiwan tourist resources"
"dataset_name": "teacher_qualification"
"description": "以下為教師資格的單選題,請提供正確答案的選項。\n\n"
"doc_to_choice":
- "A"
- "B"
- "C"
- "D"
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\n\
D. {{choices[3]}}{% if choices is defined and choices|length > 4 %}\nE. {{choices[4]}}{%\
\ endif %}{% if choices is defined and choices|length > 5 %}\nF. {{choices[5]}}{%\
\ endif %}\nAnswer:"
"group": "tmlu_Taiwan Specific"
"group_alias": "Taiwan Specific"
"include": "_default_template_yaml"
"task": "tmlu_teacher_qualification"
"task_alias": "teacher qualification"
"dataset_name": "tour_guide"
"description": "以下為導遊的單選題,請提供正確答案的選項。\n\n"
"doc_to_choice":
- "A"
- "B"
- "C"
- "D"
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\n\
D. {{choices[3]}}{% if choices is defined and choices|length > 4 %}\nE. {{choices[4]}}{%\
\ endif %}{% if choices is defined and choices|length > 5 %}\nF. {{choices[5]}}{%\
\ endif %}\nAnswer:"
"group": "tmlu_humanities"
"group_alias": "humanities"
"include": "_default_template_yaml"
"task": "tmlu_tour_guide"
"task_alias": "tour guide"
"dataset_name": "tour_leader"
"description": "以下為領隊的單選題,請提供正確答案的選項。\n\n"
"doc_to_choice":
- "A"
- "B"
- "C"
- "D"
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\n\
D. {{choices[3]}}{% if choices is defined and choices|length > 4 %}\nE. {{choices[4]}}{%\
\ endif %}{% if choices is defined and choices|length > 5 %}\nF. {{choices[5]}}{%\
\ endif %}\nAnswer:"
"group": "tmlu_humanities"
"group_alias": "humanities"
"include": "_default_template_yaml"
"task": "tmlu_tour_leader"
"task_alias": "tour leader"
import datasets
def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
def _helper(doc):
# modifies the contents of a single
# document in our dataset.
answer_list = ["A", "B", "C", "D"]
choices = [doc["A"], doc["B"], doc["C"], doc["D"]]
if doc.get("E", None):
answer_list.append("E")
choices.append(doc["E"])
if doc.get("F", None):
answer_list.append("F")
choices.append(doc["F"])
out_doc = {
"questions": doc["question"],
"choices": choices,
"goal": answer_list.index(doc["answer"]),
}
return out_doc
return dataset.map(_helper) # returns back a datasets.Dataset object
category subject name # Questions # Choices
civics AST_civics 分科測驗公民 57 4
geography AST_geography 分科測驗地理 58 4
civics CAP_civics 會考公民 73 4
geography CAP_geography 會考地理 45 4
civics GSAT_civics 學測公民 73 4
geography GSAT_geography 學測地理 49 4
accounting accountant 會計師 117 4
psychologist clinical_psychologist 臨床心理師 117 4
biology AST_biology 分科測驗生物 40 4
chemistry AST_chemistry 分科測驗化學 34 5
mathematics AST_mathematics 分科測驗數學 25 5
physics AST_physics 分科測驗物理 43 5
biology CAP_biology 會考生物 27 4
chemistry CAP_chemistry 會考化學 27 4
earth science CAP_earth_science 會考地球科學 15 4
mathematics CAP_mathematics 會考數學 115 4
physics CAP_physics 會考物理 15 4
biology GSAT_biology 學測生物 21 5
chemistry GSAT_chemistry 學測化學 29 5
earth science GSAT_earth_science 學測地球科學 24 5
mathematics GSAT_mathematics 學測數學 29 5
physics GSAT_physics 學測物理 24 5
Chinese AST_chinese 分科測驗國文 131 4
history AST_history 分科測驗歷史 56 4
Chinese CAP_chinese 會考國文 61 4
history CAP_history 會考歷史 56 4
Chinese GSAT_chinese 學測國文 97 4
history GSAT_history 學測歷史 85 4
Tour tour_guide 導遊 99 4
Tour tour_leader 領隊 145 4
law lawyer_qualification 律師資格 279 4
Taiwan Specific driving_rule 台灣駕駛規則 432 4
Taiwan Specific teacher_qualification 教師資格 75 4
Taiwan Specific taiwan_tourist_resources 台灣觀光資源 50 4
Medicine basic_traditional_chinese_medicine 中醫基礎醫學 159 4
Medicine clinical_traditional_chinese_medicine 中醫針灸 79 4
Nutritionist nutritionist 營養師 120 4
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment