Unverified Commit 2d4e5597 authored by bittersweet1999's avatar bittersweet1999 Committed by GitHub
Browse files

[Feature] Add multi-model judge and fix some problems (#1016)

* support multi-model judge and moe judge

* test_moe

* test_moe

* test

* add moe judge

* support multi-judge-model
parent c220550f
......@@ -46,3 +46,25 @@ def get_infer_output_path(model_cfg: ConfigDict,
model_abbr = model_abbr_from_cfg(model_cfg)
dataset_abbr = dataset_abbr_from_cfg(dataset_cfg)
return osp.join(root_path, model_abbr, f'{dataset_abbr}.{file_extension}')
def deal_with_judge_model_abbr(model_cfg, judge_model_cfg, meta=False):
if isinstance(model_cfg, ConfigDict):
model_cfg = (model_cfg, )
if meta:
for m_cfg in model_cfg:
if 'summarized-by--' in m_cfg['abbr']:
return model_cfg
model_cfg += ({
'abbr':
'summarized-by--' + model_abbr_from_cfg(judge_model_cfg)
}, )
else:
for m_cfg in model_cfg:
if 'judged-by--' in m_cfg['abbr']:
return model_cfg
model_cfg += ({
'abbr':
'judged-by--' + model_abbr_from_cfg(judge_model_cfg)
}, )
return model_cfg
......@@ -341,6 +341,13 @@ def main():
if args.dry_run:
return
runner = RUNNERS.build(cfg.eval.runner)
# For meta-review-judge in subjective evaluation
if isinstance(tasks, list) and len(tasks) != 0 and isinstance(
tasks[0], list):
for task_part in tasks:
runner(task_part)
else:
runner(tasks)
# visualize
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment