[Fix] Fix AGIEval multiple choice (#137)

* update agieval data * rename variables

[Fix] Fix AGIEval multiple choice (#137)
* update agieval data * rename variables
876ade71 · Leymore · GitHub · 0555d59a · 876ade71 · 876ade71
Unverified Commit 876ade71 authored Aug 10, 2023 by Leymore Committed by GitHub Aug 10, 2023
4 changed files
--- a/configs/datasets/agieval/agieval_gen_0a9ace.py
+++ b/configs/datasets/agieval/agieval_gen_0a9ace.py
@@ -15,7 +15,6 @@ agieval_single_choice_sets = [
    'gaokao-history',
    'gaokao-biology',
    'gaokao-chemistry',
-    'gaokao-physics',
    'gaokao-mathqa',
    'logiqa-zh',
    'lsat-ar',
@@ -28,8 +27,9 @@ agieval_single_choice_sets = [
    'aqua-rat',
 ]
 agieval_multiple_choices_sets = [
-    # 'jec-qa-kd',  # 数据需要额外处理
-    # 'jec-qa-ca',  # 数据需要额外处理
+    'gaokao-physics',
+    'jec-qa-kd',
+    'jec-qa-ca',
 ]
 agieval_cloze_sets = ['gaokao-mathcloze', 'math']

@@ -58,7 +58,7 @@ for name in agieval_single_choice_sets:
            infer_cfg=agieval_infer_cfg.copy(),
            eval_cfg=agieval_eval_cfg.copy()))

-for name in agieval_cloze_sets:
+for name in agieval_multiple_choices_sets + agieval_cloze_sets:
    agieval_infer_cfg = dict(
        prompt_template=dict(
            type=PromptTemplate,

--- a/configs/datasets/agieval/agieval_gen_397d81.py
+++ b/configs/datasets/agieval/agieval_gen_397d81.py
@@ -15,7 +15,6 @@ agieval_single_choice_sets = [
    'gaokao-history',
    'gaokao-biology',
    'gaokao-chemistry',
-    'gaokao-physics',
    'gaokao-mathqa',
    'logiqa-zh',
    'lsat-ar',
@@ -28,6 +27,7 @@ agieval_single_choice_sets = [
    'aqua-rat',
 ]
 agieval_multiple_choices_sets = [
+    'gaokao-physics',
    'jec-qa-kd',
    'jec-qa-ca',
 ]

--- a/configs/datasets/agieval/agieval_mixed_2f14ad.py
+++ b/configs/datasets/agieval/agieval_mixed_2f14ad.py
@@ -12,7 +12,6 @@ agieval_single_choice_sets = [
    'gaokao-history',
    'gaokao-biology',
    'gaokao-chemistry',
-    'gaokao-physics',
    'gaokao-mathqa',
    'logiqa-zh',
    'lsat-ar',
@@ -25,6 +24,7 @@ agieval_single_choice_sets = [
    'aqua-rat',
 ]
 agieval_multiple_choices_sets = [
+    'gaokao-physics',
    'jec-qa-kd',
    'jec-qa-ca',
 ]

--- a/opencompass/datasets/agieval/agieval.py
+++ b/opencompass/datasets/agieval/agieval.py
@@ -40,16 +40,22 @@ class AGIEvalDataset_v2(BaseDataset):
        assert setting_name in 'zero-shot', 'only support zero-shot setting'
        filename = osp.join(path, name + '.jsonl')
        with open(filename, encoding='utf-8') as f:
-            _data = [json.loads(line.strip()) for line in f]
-        data = []
-        for _d in _data:
-            passage = _d['passage'] if _d['passage'] else ''
-            question = passage + _d['question']
-            options = '\n'.join(_d['options']) if _d['options'] else ''
-            label = _d['label'] if _d['label'] else _d['answer']
+            data = [json.loads(line.strip()) for line in f]
+        dataset = []
+        for item in data:
+            passage = item['passage'] if item['passage'] else ''
+            question = passage + item['question']
+            options = '\n'.join(item['options']) if item['options'] else ''
+            if item['label']:
+                if isinstance(item['label'], list):
+                    label = ''.join(item['label'])
+                else:
+                    label = item['label']
+            else:
+                label = item['answer']
            d = {'question': question, 'options': options, 'label': label}
-            data.append(d)
-        dataset = Dataset.from_list(data)
+            dataset.append(d)
+        dataset = Dataset.from_list(dataset)
        return dataset