Unverified Commit ad872a5d authored by Xiaoming Shi's avatar Xiaoming Shi Committed by GitHub
Browse files

[Feature] Update MedBench (#779)



* update medbench

* medbench update

* format medbench

* format

* Update

* update

* update

* update suffix

---------
Co-authored-by: default avatar施晓明 <PJLAB\shixiaoming@pjnl104220118l.pjlab.org>
Co-authored-by: default avatarLeymore <zfz-960727@163.com>
parent a74e4c1a
...@@ -6,7 +6,7 @@ exclude: | ...@@ -6,7 +6,7 @@ exclude: |
opencompass/openicl/icl_evaluator/hf_metrics/| opencompass/openicl/icl_evaluator/hf_metrics/|
opencompass/datasets/lawbench/utils| opencompass/datasets/lawbench/utils|
opencompass/datasets/lawbench/evaluation_functions/| opencompass/datasets/lawbench/evaluation_functions/|
opencompass/datasets/medbench| opencompass/datasets/medbench/|
docs/zh_cn/advanced_guides/compassbench_intro.md docs/zh_cn/advanced_guides/compassbench_intro.md
) )
repos: repos:
......
...@@ -2,41 +2,24 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate ...@@ -2,41 +2,24 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import ( from opencompass.datasets import MedBenchDataset, MedBenchEvaluator, MedBenchEvaluator_Cloze, MedBenchEvaluator_IE, MedBenchEvaluator_mcq, MedBenchEvaluator_CMeEE, MedBenchEvaluator_CMeIE, MedBenchEvaluator_CHIP_CDEE, MedBenchEvaluator_CHIP_CDN, MedBenchEvaluator_CHIP_CTC, MedBenchEvaluator_NLG, MedBenchEvaluator_TF, MedBenchEvaluator_DBMHG, MedBenchEvaluator_SMDoc, MedBenchEvaluator_IMCS_V2_MRG
MedBenchDataset,
MedBenchEvaluator,
MedBenchEvaluator_Cloze,
MedBenchEvaluator_IE,
MedBenchEvaluator_mcq,
MedBenchEvaluator_CMeEE,
MedBenchEvaluator_CMeIE,
MedBenchEvaluator_CHIP_CDEE,
MedBenchEvaluator_CHIP_CDN,
MedBenchEvaluator_CHIP_CTC,
MedBenchEvaluator_NLG,
MedBenchEvaluator_TF,
MedBenchEvaluator_EMR,
)
from opencompass.utils.text_postprocessors import first_capital_postprocess from opencompass.utils.text_postprocessors import first_capital_postprocess
medbench_reader_cfg = dict( medbench_reader_cfg = dict(
input_columns=['problem_input'], output_column='label') input_columns=['problem_input'], output_column='label')
medbench_multiple_choices_sets = ['Health_exam', 'DDx-basic', 'DDx-advanced_pre', 'DDx-advanced_final', 'SafetyBench'] # 选择题,用acc判断 medbench_multiple_choices_sets = ['Med-Exam', 'DDx-basic', 'DDx-advanced', 'SafetyBench'] # 选择题,用acc判断
medbench_qa_sets = ['Health_Counseling', 'Medicine_Counseling', 'MedDG', 'MedSpeQA', 'MedTreat', 'CMB-Clin'] # 开放式QA,有标答 medbench_qa_sets = ['MedHC', 'MedMC', 'MedDG', 'MedSpeQA', 'MedTreat', 'CMB-Clin'] # 开放式QA,有标答
medbench_cloze_sets = ['Triage'] # 限定域QA,有标答 medbench_cloze_sets = ['MedHG'] # 限定域QA,有标答
medbench_single_choice_sets = ['Medicine_attack'] # 正确与否判断,有标答 medbench_single_choice_sets = ['DrugCA'] # 正确与否判断,有标答
medbench_ie_sets = ['EMR', 'CMeEE'] # 判断识别的实体是否一致,用F1评价 medbench_ie_sets = ['DBMHG', 'CMeEE', 'CMeIE', 'CHIP-CDEE', 'CHIP-CDN', 'CHIP-CTC', 'SMDoc', 'IMCS-V2-MRG'] # 判断识别的实体是否一致,用F1评价
#, 'CMeIE', 'CHIP_CDEE', 'CHIP_CDN', 'CHIP_CTC', 'Doc_parsing', 'MRG'
medbench_datasets = [] medbench_datasets = []
for name in medbench_single_choice_sets: for name in medbench_single_choice_sets:
medbench_infer_cfg = dict( medbench_infer_cfg = dict(
prompt_template=dict( prompt_template=dict(
...@@ -144,7 +127,7 @@ for name in medbench_ie_sets: ...@@ -144,7 +127,7 @@ for name in medbench_ie_sets:
inferencer=dict(type=GenInferencer)) inferencer=dict(type=GenInferencer))
medbench_eval_cfg = dict( medbench_eval_cfg = dict(
evaluator=dict(type=eval('MedBenchEvaluator_'+name)), pred_role="BOT") evaluator=dict(type=eval('MedBenchEvaluator_'+name.replace('-', '_'))), pred_role="BOT")
medbench_datasets.append( medbench_datasets.append(
dict( dict(
...@@ -157,4 +140,4 @@ for name in medbench_ie_sets: ...@@ -157,4 +140,4 @@ for name in medbench_ie_sets:
infer_cfg=medbench_infer_cfg.copy(), infer_cfg=medbench_infer_cfg.copy(),
eval_cfg=medbench_eval_cfg.copy())) eval_cfg=medbench_eval_cfg.copy()))
del name, medbench_infer_cfg, medbench_eval_cfg del name, medbench_infer_cfg, medbench_eval_cfg
\ No newline at end of file
...@@ -11,31 +11,31 @@ from .constructions import ChatGPTSchema, ResultsForHumanSchema ...@@ -11,31 +11,31 @@ from .constructions import ChatGPTSchema, ResultsForHumanSchema
from .utils import extract_answer, read_jsonl, save_jsonl from .utils import extract_answer, read_jsonl, save_jsonl
# define the datasets # define the datasets
medbench_multiple_choices_sets = ['Health_exam', 'DDx-basic', 'DDx-advanced_pre', 'DDx-advanced_final', 'SafetyBench'] # 选择题,用acc判断 medbench_multiple_choices_sets = ['Med-Exam', 'DDx-basic', 'DDx-advanced', 'DDx-advanced', 'SafetyBench'] # 选择题,用acc判断
medbench_qa_sets = ['Health_Counseling', 'Medicine_Counseling', 'MedDG', 'MedSpeQA', 'MedTreat', 'CMB-Clin'] # 开放式QA,有标答 medbench_qa_sets = ['MedHC', 'MedMC', 'MedDG', 'MedSpeQA', 'MedTreat', 'CMB-Clin'] # 开放式QA,有标答
medbench_cloze_sets = ['Triage'] # 限定域QA,有标答 medbench_cloze_sets = ['MedHG'] # 限定域QA,有标答
medbench_single_choice_sets = ['Medicine_attack'] # 正确与否判断,有标答 medbench_single_choice_sets = ['DrugCA'] # 正确与否判断,有标答
medbench_ie_sets = ['EMR', 'CMeEE'] # 判断识别的实体是否一致,用F1评价 medbench_ie_sets = ['DBMHG', 'CMeEE', 'CMeIE', 'CHIP-CDEE', 'CHIP-CDN', 'CHIP-CTC', 'SMDoc', 'IMCS-V2-MRG'] # 判断识别的实体是否一致,用F1评价
def convert_zero_shot(line, dataset_name): def convert_zero_shot(line, dataset_name):
# passage = line['passage'] if line['passage'] is not None else '' # passage = line['passage'] if line['passage'] is not None else ''
if dataset_name in medbench_qa_sets: # if dataset_name in medbench_qa_sets:
return line['question'] # return line['question']
elif dataset_name in medbench_cloze_sets: # elif dataset_name in medbench_cloze_sets:
return '问题:' + line['question'] + '\n答案:' # return '问题:' + line['question'] + '\n答案:'
elif dataset_name in medbench_multiple_choices_sets: # elif dataset_name in medbench_multiple_choices_sets:
return '问题:' + line['question'] + ' ' \ # return '问题:' + line['question'] + ' ' \
+ '选项:' + ' '.join(line['options']) + '\n从A到G,我们应该选择' # + '选项:' + ' '.join(line['options']) + '\n从A到G,我们应该选择'
else: # else:
return line['question'] # return line['question']
return line['question']
prefix = '该问题为单选题,所有选项中必有一个正确答案,且只有一个正确答案。\n' prefix = '该问题为单选题,所有选项中必有一个正确答案,且只有一个正确答案。\n'
# def convert_zero_shot_CoT_stage1(line, dataset_name): # def convert_zero_shot_CoT_stage1(line, dataset_name):
# try: # try:
# passage = line['passage'] if line['passage'] is not None else '' # passage = line['passage'] if line['passage'] is not None else ''
......
This diff is collapsed.
This diff is collapsed.
...@@ -148,8 +148,8 @@ def parse_math_answer(setting_name, raw_string): ...@@ -148,8 +148,8 @@ def parse_math_answer(setting_name, raw_string):
last_match = None last_match = None
if '=' in s: if '=' in s:
last_match = s.split('=')[-1].lstrip(' ').rstrip('.') last_match = s.split('=')[-1].lstrip(' ').rstrip('.')
if '\\n' in last_match: if '\n' in last_match:
last_match = last_match.split('\\n')[0] last_match = last_match.split('\n')[0]
else: else:
pattern = '(?:\\$)?\d+(?:\.\d+)?(?![\w\d])' pattern = '(?:\\$)?\d+(?:\.\d+)?(?![\w\d])'
matches = re.findall(pattern, s) matches = re.findall(pattern, s)
...@@ -170,6 +170,8 @@ def parse_math_answer(setting_name, raw_string): ...@@ -170,6 +170,8 @@ def parse_math_answer(setting_name, raw_string):
def parse_qa_multiple_answer(string): def parse_qa_multiple_answer(string):
# if setting_name == 'few-shot-CoT': # if setting_name == 'few-shot-CoT':
# string = extract_last_line(string) # string = extract_last_line(string)
for x in ['CC', 'CA', 'AC', 'POMES', 'AI', 'MIBG', 'CF', 'CTE', 'AD', 'CB', 'BG', 'BD', 'BE', 'BH', 'CTB', 'BI', 'CE', 'Pugh', 'Child', 'CTI', 'CTA', 'TACE', 'PPD', 'Castleman', 'BA', 'CH', 'AB', 'CTC', 'CT', 'CTH', 'CD', 'AH', 'AE', 'AA', 'AF', 'BC', 'CG', 'BB', 'CI', 'BF', 'CTF', 'CTG', 'AG', 'CTD', '分级C', '分级A', 'I131', '分级B', '分级D', '131I‐MIBG', 'NYHA', 'IPF', 'DIP', 'Lambert-Eaton', 'Graves', 'IIA期', 'CKD', 'FDA', 'A级', 'B级', 'C级', 'D级', '维生素D']:
string = string.replace(x, '')
pattern = '\(*([A-Z])\)*' pattern = '\(*([A-Z])\)*'
match = re.findall(pattern, string) match = re.findall(pattern, string)
if match: if match:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment