[Feature] Add multi-model judge and fix some problems (#1016)

* support multi-model judge and moe judge * test_moe * test_moe * test * add moe judge * support multi-judge-model

[Feature] Add multi-model judge and fix some problems (#1016)
* support multi-model judge and moe judge * test_moe * test_moe * test * add moe judge * support multi-judge-model
2d4e5597 · bittersweet1999 · GitHub · c220550f · 2d4e5597 · 2d4e5597
Unverified Commit 2d4e5597 authored Apr 02, 2024 by bittersweet1999 Committed by GitHub Apr 02, 2024
Show whitespace changes
Inline Side-by-side

Showing with 30 additions and 1 deletion

opencompass/utils/abbr.py opencompass/utils/abbr.py +22 -0

run.py run.py +8 -1

No files found.
--- a/opencompass/utils/abbr.py
+++ b/opencompass/utils/abbr.py
@@ -46,3 +46,25 @@ def get_infer_output_path(model_cfg: ConfigDict,
    model_abbr = model_abbr_from_cfg(model_cfg)
    dataset_abbr = dataset_abbr_from_cfg(dataset_cfg)
    return osp.join(root_path, model_abbr, f'{dataset_abbr}.{file_extension}')
+
+
+def deal_with_judge_model_abbr(model_cfg, judge_model_cfg, meta=False):
+    if isinstance(model_cfg, ConfigDict):
+        model_cfg = (model_cfg, )
+    if meta:
+        for m_cfg in model_cfg:
+            if 'summarized-by--' in m_cfg['abbr']:
+                return model_cfg
+        model_cfg += ({
+            'abbr':
+            'summarized-by--' + model_abbr_from_cfg(judge_model_cfg)
+        }, )
+    else:
+        for m_cfg in model_cfg:
+            if 'judged-by--' in m_cfg['abbr']:
+                return model_cfg
+        model_cfg += ({
+            'abbr':
+            'judged-by--' + model_abbr_from_cfg(judge_model_cfg)
+        }, )
+    return model_cfg
--- a/run.py
+++ b/run.py
@@ -341,6 +341,13 @@ def main():
        if args.dry_run:
            return
        runner = RUNNERS.build(cfg.eval.runner)
+
+        # For meta-review-judge in subjective evaluation
+        if isinstance(tasks, list) and len(tasks) != 0 and isinstance(
+                tasks[0], list):
+            for task_part in tasks:
+                runner(task_part)
+        else:
            runner(tasks)

    # visualize