Unverified Commit ff2c49ff authored by Gyouk Chu's avatar Gyouk Chu Committed by GitHub
Browse files

Update KorMedMCQA: ver 2.0 (#2540)

* Update KorMedMCQA: ver 2.0

* Fix pre-commit formatting issues

* Update KorMedMCQA v2.0

* pre-commit
parent 3a4e4674
...@@ -25,20 +25,21 @@ Homepage: https://huggingface.co/datasets/sean0042/KorMedMCQA ...@@ -25,20 +25,21 @@ Homepage: https://huggingface.co/datasets/sean0042/KorMedMCQA
### Groups and Tasks ### Groups and Tasks
* `kormedmcqa`: Runs `kormedmcqa_doctor`, `kormedmcqa_nurse`, and `kormedmcqa_pharm`. * `kormedmcqa`: Runs `kormedmcqa_doctor`, `kormedmcqa_nurse`, `kormedmcqa_pharm`, and `kormedmcqa_dentist`.
#### Tasks #### Tasks
* `kormedmcqa_doctor`: `Official Korean Doctor Examination` * `kormedmcqa_doctor`: `Official Korean Doctor Examination`
* `kormedmcqa_nurse`: `Official Korean Nurse Examination` * `kormedmcqa_nurse`: `Official Korean Nurse Examination`
* `kormedmcqa_pharm`: `Official Korean Pharmacist Examination` * `kormedmcqa_pharm`: `Official Korean Pharmacist Examination`
* `kormedmcqa_dentist`: `Official Korean Dentist Examination`
### Checklist ### Checklist
For adding novel benchmarks/datasets to the library: For adding novel benchmarks/datasets to the library:
* [x] Is the task an existing benchmark in the literature? * [ ] Is the task an existing benchmark in the literature?
* [x] Have you referenced the original paper that introduced the task? * [ ] Have you referenced the original paper that introduced the task?
* [x] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test? * [ ] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test?
If other tasks on this dataset are already supported: If other tasks on this dataset are already supported:
......
...@@ -3,9 +3,10 @@ task: ...@@ -3,9 +3,10 @@ task:
- kormedmcqa_doctor - kormedmcqa_doctor
- kormedmcqa_nurse - kormedmcqa_nurse
- kormedmcqa_pharm - kormedmcqa_pharm
- kormedmcqa_dentist
aggregate_metric_list: aggregate_metric_list:
- metric: exact_match - metric: exact_match
aggregation: mean aggregation: mean
weight_by_size: true weight_by_size: true
metadata: metadata:
version: 0.0 version: 2.0
task : kormedmcqa_doctor
dataset_path : sean0042/KorMedMCQA dataset_path : sean0042/KorMedMCQA
dataset_name : doctor
test_split : test test_split : test
fewshot_split : dev fewshot_split : fewshot
fewshot_config: fewshot_config:
sampler: first_n sampler: first_n
doc_to_text: "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nE. {{E}}\n정답: {{['A', 'B', 'C', 'D', 'E'][answer-1]}}\n\n"
doc_to_target: ""
output_type: generate_until output_type: generate_until
doc_to_text: "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nE. {{E}}\n정답:" doc_to_text: "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nE. {{E}}\n정답:"
doc_to_target: "{{['A', 'B', 'C', 'D', 'E'][answer-1]}}" doc_to_target: "{{['A', 'B', 'C', 'D', 'E'][answer-1]}}"
...@@ -16,11 +16,18 @@ metric_list: ...@@ -16,11 +16,18 @@ metric_list:
ignore_punctuation: true ignore_punctuation: true
regexes_to_ignore: regexes_to_ignore:
- " " - " "
- "\n"
generation_kwargs: generation_kwargs:
until: until:
- "Q:" - "Q:"
- "\n\n"
- "</s>" - "</s>"
- "<|im_end|>"
- "." - "."
- "\n\n"
do_sample: false do_sample: false
temperature: 0.0 temperature: 0.0
max_gen_toks: 1024
metadata:
version: 2.0
dataset_kwargs:
trust_remote_code: true
include: _template_yaml
dataset_name: dentist
task: kormedmcqa_dentist
include: _template_yaml
dataset_name: doctor
task: kormedmcqa_doctor
task : kormedmcqa_nurse
dataset_path : sean0042/KorMedMCQA
dataset_name : nurse
test_split : test
fewshot_split : dev
fewshot_config:
sampler: first_n
output_type: generate_until
doc_to_text: "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nE. {{E}}\n정답:"
doc_to_target: "{{['A', 'B', 'C', 'D', 'E'][answer-1]}}"
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
regexes_to_ignore:
- " "
generation_kwargs:
until:
- "Q:"
- "\n\n"
- "</s>"
- "."
do_sample: false
temperature: 0.0
task : kormedmcqa_pharm
dataset_path : sean0042/KorMedMCQA
dataset_name : pharm
test_split : test
fewshot_split : dev
fewshot_config:
sampler: first_n
output_type: generate_until
doc_to_text: "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nE. {{E}}\n정답:"
doc_to_target: "{{['A', 'B', 'C', 'D', 'E'][answer-1]}}"
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
regexes_to_ignore:
- " "
generation_kwargs:
until:
- "Q:"
- "\n\n"
- "</s>"
- "."
do_sample: false
temperature: 0.0
include: _template_yaml
dataset_name: nurse
task: kormedmcqa_nurse
include: _template_yaml
dataset_name: pharm
task: kormedmcqa_pharm
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment