[Feature] Use dataset in local path (#570)

* update commonsenseqa * update drop * update flores_first100 * update gsm8k * update humaneval * update lambda * update obqa * update piqa * update race * update siqa * update story_cloze * update strategyqa * update tydiqa * update winogrande * update doc * update hellaswag * fix obqa * update collections * update .zip name

[Feature] Use dataset in local path (#570)
* update commonsenseqa * update drop * update flores_first100 * update gsm8k * update humaneval * update lambda * update obqa * update piqa * update race * update siqa * update story_cloze * update strategyqa * update tydiqa * update winogrande * update doc * update hellaswag * fix obqa * update collections * update .zip name
689ffe5b · Fengzhe Zhou · GitHub · d6aaac22 · 689ffe5b · 689ffe5b
Unverified Commit 689ffe5b authored Nov 13, 2023 by Fengzhe Zhou Committed by GitHub Nov 13, 2023
15 changed files
--- a/opencompass/datasets/commonsenseqa.py
+++ b/opencompass/datasets/commonsenseqa.py
-from datasets import load_dataset
+import json
+import os
+
+from datasets import Dataset, DatasetDict

 from opencompass.registry import LOAD_DATASET

@@ -9,14 +12,33 @@ from .base import BaseDataset
 class commonsenseqaDataset(BaseDataset):

    @staticmethod
-    def load(**kwargs):
-        dataset = load_dataset(**kwargs)
-
-        def pre_process(example):
-            for i in range(5):
-                example[chr(ord('A') + i)] = example['choices']['text'][i]
-            return example
+    def load(path):
+        dataset = {}
+        for split, stub in [
+            ['train', 'train_rand_split.jsonl'],
+            ['validation', 'dev_rand_split.jsonl'],
+        ]:
+            data_path = os.path.join(path, stub)
+            dataset_list = []
+            with open(data_path, 'r', encoding='utf-8') as f:
+                for line in f:
+                    line = json.loads(line)
+                    dataset_list.append({
+                        'question':
+                        line['question']['stem'],
+                        'A':
+                        line['question']['choices'][0]['text'],
+                        'B':
+                        line['question']['choices'][1]['text'],
+                        'C':
+                        line['question']['choices'][2]['text'],
+                        'D':
+                        line['question']['choices'][3]['text'],
+                        'E':
+                        line['question']['choices'][4]['text'],
+                        'answerKey':
+                        line['answerKey'],
+                    })
+            dataset[split] = Dataset.from_list(dataset_list)

-        dataset = dataset.map(pre_process).remove_columns(
-            ['question_concept', 'id', 'choices'])
-        return dataset
+        return DatasetDict(dataset)
--- a/opencompass/datasets/drop.py
+++ b/opencompass/datasets/drop.py
-from datasets import DatasetDict, load_dataset
+import json
+
+from datasets import Dataset, DatasetDict

 from opencompass.registry import LOAD_DATASET

@@ -9,21 +11,37 @@ from .base import BaseDataset
 class dropDataset(BaseDataset):

    @staticmethod
-    def load(**kwargs):
-        dataset = load_dataset(**kwargs, split='validation')
-
-        def pre_process(example):
-            example['answers'] = example['answers_spans']['spans']
-            example['prompt'] = example.pop('passage')
-            return example
-
-        def only_number(example):
-            for i in example['answers_spans']['types']:
-                if i == 'number':
-                    return True
-            return False
-
-        dataset = dataset.filter(only_number)
-        dataset = dataset.map(pre_process).remove_columns(
-            ['section_id', 'query_id'])
-        return DatasetDict({'validation': dataset})
+    def get_answers(validated_answers):
+        answers = []
+        for answer_item in validated_answers:
+            if answer_item['number']:
+                answers.append(answer_item['number'])
+            elif any(answer_item['date'][i] for i in ['day', 'month', 'year']):
+                d = [answer_item['date'][i] for i in ['day', 'month', 'year']]
+                answers.append(' '.join(d).strip())
+            else:
+                for span in answer_item['spans']:
+                    answers.append(span)
+        answers = list(set(answers))
+        return answers
+
+    @staticmethod
+    def load(path, only_number=True):
+        with open(path, 'r', encoding='utf-8') as f:
+            lines = json.load(f)
+        dataset_list = []
+        for line in lines.values():
+            for qa_pair in line['qa_pairs']:
+                validated_answers = qa_pair['validated_answers']
+                if only_number and not any(i['number']
+                                           for i in validated_answers):
+                    continue
+                item = {
+                    'prompt': line['passage'],
+                    'question': qa_pair['question'],
+                    'answers': dropDataset.get_answers(validated_answers),
+                }
+                dataset_list.append(item)
+
+        dataset_list = Dataset.from_list(dataset_list)
+        return DatasetDict({'validation': dataset_list})
--- a/opencompass/datasets/flores.py
+++ b/opencompass/datasets/flores.py
+import os
 import re

-from datasets import DatasetDict, load_dataset
+from datasets import Dataset, DatasetDict

 from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS

@@ -11,15 +12,30 @@ from .base import BaseDataset
 class FloresFirst100Dataset(BaseDataset):

    @staticmethod
-    def load(name):
-        return DatasetDict({
-            'dev':
-            load_dataset(path='facebook/flores', name=name, split='dev'),
-            'devtest':
-            load_dataset(path='facebook/flores',
-                         name=name,
-                         split='devtest[:100]')
-        })
+    def load_single(src_path, tgt_path, src_lang, tgt_lang):
+
+        with open(src_path, 'r', encoding='utf-8') as f:
+            src_lines = f.readlines()
+        with open(tgt_path, 'r', encoding='utf-8') as f:
+            tgt_lines = f.readlines()
+        assert len(src_lines) == len(tgt_lines)
+        dataset_list = [{
+            f'sentence_{src_lang}': src_lines[i].strip(),
+            f'sentence_{tgt_lang}': tgt_lines[i].strip(),
+        } for i in range(len(src_lines))]
+        return Dataset.from_list(dataset_list)
+
+    @staticmethod
+    def load(path, name):
+        src_lang, tgt_lang = name.split('-')
+        dev_dataset = FloresFirst100Dataset.load_single(
+            os.path.join(path, 'dev', f'{src_lang}.dev'),
+            os.path.join(path, 'dev', f'{tgt_lang}.dev'), src_lang, tgt_lang)
+        devtest_dataset = FloresFirst100Dataset.load_single(
+            os.path.join(path, 'devtest', f'{src_lang}.devtest'),
+            os.path.join(path, 'devtest', f'{tgt_lang}.devtest'), src_lang,
+            tgt_lang)
+        return DatasetDict({'dev': dev_dataset, 'devtest': devtest_dataset})


 @TEXT_POSTPROCESSORS.register_module('flores')

--- a/opencompass/datasets/gsm8k.py
+++ b/opencompass/datasets/gsm8k.py
+import json
+import os
+
+from datasets import Dataset, DatasetDict
+
 from opencompass.openicl import BaseEvaluator
-from opencompass.registry import TEXT_POSTPROCESSORS
+from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS
+
+from .base import BaseDataset
+
+
+@LOAD_DATASET.register_module()
+class GSM8KDataset(BaseDataset):
+
+    @staticmethod
+    def load(path):
+        datasets = {}
+        for split in ['train', 'test']:
+            split_path = os.path.join(path, split + '.jsonl')
+            dataset = []
+            with open(split_path, 'r', encoding='utf-8') as f:
+                for line in f:
+                    line = json.loads(line.strip())
+                    line['answer']
+                    dataset.append(line)
+            datasets[split] = Dataset.from_list(dataset)
+        return DatasetDict(datasets)


 @TEXT_POSTPROCESSORS.register_module('gsm8k_dataset')

--- a/opencompass/datasets/hellaswag.py
+++ b/opencompass/datasets/hellaswag.py
 import json

-from datasets import Dataset, load_dataset
+from datasets import Dataset

 from opencompass.registry import LOAD_DATASET

@@ -11,15 +11,20 @@ from .base import BaseDataset
 class hellaswagDataset(BaseDataset):

    @staticmethod
-    def load(**kwargs):
-        dataset = load_dataset(**kwargs)
-
-        def preprocess(example):
-            for i in range(4):
-                example[chr(ord('A') + i)] = example['endings'][i]
-            return example
-
-        dataset = dataset.map(preprocess).remove_columns(['endings'])
+    def load(path):
+        dataset = []
+        with open(path, 'r', encoding='utf-8') as f:
+            for line in f:
+                data = json.loads(line)
+                dataset.append({
+                    'ctx': data['query'].split(': ', 2)[-1],
+                    'A': data['choices'][0],
+                    'B': data['choices'][1],
+                    'C': data['choices'][2],
+                    'D': data['choices'][3],
+                    'label': data['gold'],
+                })
+        dataset = Dataset.from_list(dataset)
        return dataset


@@ -27,19 +32,20 @@ class hellaswagDataset(BaseDataset):
 class hellaswagDataset_V2(BaseDataset):

    @staticmethod
-    def load(**kwargs):
-        dataset = load_dataset(**kwargs)
-
-        def preprocess(example):
-            for i in range(4):
-                example[chr(ord('A') + i)] = example['endings'][i]
-            if example['label']:
-                example['label'] = 'ABCD'[int(example['label'])]
-            else:
-                example['label'] = 'NULL'
-            return example
-
-        dataset = dataset.map(preprocess).remove_columns(['endings'])
+    def load(path):
+        dataset = []
+        with open(path, 'r', encoding='utf-8') as f:
+            for line in f:
+                data = json.loads(line)
+                dataset.append({
+                    'ctx': data['query'].split(': ', 1)[-1],
+                    'A': data['choices'][0],
+                    'B': data['choices'][1],
+                    'C': data['choices'][2],
+                    'D': data['choices'][3],
+                    'label': 'ABCD'[data['gold']],
+                })
+        dataset = Dataset.from_list(dataset)
        return dataset



--- a/opencompass/datasets/humaneval.py
+++ b/opencompass/datasets/humaneval.py
+import json
 import os.path as osp
 import re
 import tempfile
 from typing import List

+from datasets import Dataset
+
 from opencompass.openicl.icl_evaluator import BaseEvaluator
+from opencompass.registry import LOAD_DATASET
+
+from .base import BaseDataset
+
+
+@LOAD_DATASET.register_module()
+class HumanevalDataset(BaseDataset):
+
+    @staticmethod
+    def load(path):
+        dataset = []
+        with open(path, 'r', encoding='utf-8') as f:
+            for line in f:
+                dataset.append(json.loads(line.strip()))
+        return Dataset.from_list(dataset)


 class HumanEvaluator(BaseEvaluator):

--- a/opencompass/datasets/lambada.py
+++ b/opencompass/datasets/lambada.py
+import json
 import re
 import string

-from datasets import DatasetDict, load_dataset
+from datasets import Dataset, DatasetDict

 from opencompass.openicl.icl_evaluator import BaseEvaluator
 from opencompass.registry import ICL_EVALUATORS, LOAD_DATASET
@@ -14,16 +15,12 @@ from .base import BaseDataset
 class lambadaDataset(BaseDataset):

    @staticmethod
-    def load(**kwargs):
-        dataset = load_dataset(**kwargs, split='test')
-
-        def preprocess(example):
-            prompt, target = example['text'].strip().rsplit(' ', 1)
-            example['prompt'] = prompt
-            example['label'] = target
-            return example
-
-        dataset = dataset.map(preprocess)
+    def load(path):
+        dataset = []
+        with open(path, 'r', encoding='utf-8') as f:
+            for line in f:
+                dataset.append(json.loads(line))
+        dataset = Dataset.from_list(dataset)
        return DatasetDict({'test': dataset})



--- a/opencompass/datasets/obqa.py
+++ b/opencompass/datasets/obqa.py
-from datasets import load_dataset
+import json
+
+from datasets import Dataset

 from opencompass.registry import LOAD_DATASET

@@ -9,33 +11,46 @@ from .base import BaseDataset
 class OBQADataset(BaseDataset):

    @staticmethod
-    def load(**kwargs):
-        dataset = load_dataset(**kwargs)
-
-        def pre_process(example):
-            for i in range(4):
-                example[chr(ord('A') + i)] = example['choices']['text'][i]
-            return example
-
-        dataset = dataset.map(pre_process).remove_columns(['id', 'choices'])
-        return dataset
+    def load(path):
+        dataset_list = []
+        with open(path, 'r') as f:
+            for line in f:
+                line = json.loads(line)
+                item = {
+                    'A': line['question']['choices'][0]['text'],
+                    'B': line['question']['choices'][1]['text'],
+                    'C': line['question']['choices'][2]['text'],
+                    'D': line['question']['choices'][3]['text'],
+                    'question_stem': line['question']['stem'],
+                    'answerKey': line['answerKey'],
+                }
+                if 'fact1' in line:
+                    item['fact1'] = line['fact1']
+                dataset_list.append(item)
+        return Dataset.from_list(dataset_list)


 @LOAD_DATASET.register_module()
 class OBQADataset_V2(BaseDataset):

    @staticmethod
-    def load(**kwargs):
-        dataset = load_dataset(**kwargs)
-
-        def pre_process(example):
-            example['A'] = example['choices']['text'][0]
-            example['B'] = example['choices']['text'][1]
-            example['C'] = example['choices']['text'][2]
-            example['D'] = example['choices']['text'][3]
-            if not example['question_stem'].endswith('?'):
-                example['question_stem'] += ' what?'
-            return example
-
-        dataset = dataset.map(pre_process).remove_columns(['id', 'choices'])
-        return dataset
+    def load(path):
+        dataset_list = []
+        with open(path, 'r') as f:
+            for line in f:
+                line = json.loads(line)
+                question = line['question']['stem']
+                if not question.endswith('?'):
+                    question += ' what?'
+                item = {
+                    'A': line['question']['choices'][0]['text'],
+                    'B': line['question']['choices'][1]['text'],
+                    'C': line['question']['choices'][2]['text'],
+                    'D': line['question']['choices'][3]['text'],
+                    'question_stem': question,
+                    'answerKey': line['answerKey'],
+                }
+                if 'fact1' in line:
+                    item['fact1'] = line['fact1']
+                dataset_list.append(item)
+        return Dataset.from_list(dataset_list)
--- a/opencompass/datasets/piqa.py
+++ b/opencompass/datasets/piqa.py
-from datasets import load_dataset
+import json
+import os
+
+from datasets import Dataset, DatasetDict

 from opencompass.registry import LOAD_DATASET

@@ -6,45 +9,100 @@ from .base import BaseDataset


 @LOAD_DATASET.register_module()
-class piqaDataset_V2(BaseDataset):
+class piqaDataset(BaseDataset):
+
+    @staticmethod
+    def load_single(path, data_filename, label_filename):
+        data_path = os.path.join(path, data_filename)
+        label_path = os.path.join(path, label_filename)
+        dataset = []
+        with open(data_path, 'r', encoding='utf-8') as f:
+            data_lines = f.readlines()
+        with open(label_path, 'r', encoding='utf-8') as f:
+            label_lines = f.readlines()
+        assert len(data_lines) == len(label_lines)
+        for data, label in zip(data_lines, label_lines):
+            i = json.loads(data.strip())
+            i['label'] = int(label.strip())
+            dataset.append(i)
+
+        return Dataset.from_list(dataset)

    @staticmethod
-    def load(**kwargs):
-        dataset = load_dataset(**kwargs)
+    def load(path):
+        train_dataset = piqaDataset.load_single(path, 'train.jsonl',
+                                                'train-labels.lst')
+        val_dataset = piqaDataset.load_single(path, 'dev.jsonl',
+                                              'dev-labels.lst')
+        return DatasetDict({'train': train_dataset, 'validation': val_dataset})
+
+
+@LOAD_DATASET.register_module()
+class piqaDataset_V2(BaseDataset):

-        def preprocess(example):
-            assert isinstance(example['label'], int)
-            if example['label'] < 0:
-                example['answer'] = 'NULL'
+    @staticmethod
+    def load_single(path, data_filename, label_filename):
+        data_path = os.path.join(path, data_filename)
+        label_path = os.path.join(path, label_filename)
+        dataset = []
+        with open(data_path, 'r', encoding='utf-8') as f:
+            data_lines = f.readlines()
+        with open(label_path, 'r', encoding='utf-8') as f:
+            label_lines = f.readlines()
+        assert len(data_lines) == len(label_lines)
+        for data, label in zip(data_lines, label_lines):
+            i = json.loads(data.strip())
+            label = int(label.strip())
+            if label < 0:
+                i['answer'] = 'NULL'
            else:
-                example['answer'] = 'AB'[example['label']]
-            example.pop('label')
-            return example
+                i['answer'] = 'AB'[label]
+            dataset.append(i)

-        dataset = dataset.map(preprocess)
-        return dataset
+        return Dataset.from_list(dataset)
+
+    @staticmethod
+    def load(path):
+        train_dataset = piqaDataset_V2.load_single(path, 'train.jsonl',
+                                                   'train-labels.lst')
+        val_dataset = piqaDataset_V2.load_single(path, 'dev.jsonl',
+                                                 'dev-labels.lst')
+        return DatasetDict({'train': train_dataset, 'validation': val_dataset})


 @LOAD_DATASET.register_module()
 class piqaDataset_V3(BaseDataset):

    @staticmethod
-    def load(**kwargs):
-        dataset = load_dataset(**kwargs)
-
-        def preprocess(example):
-            example['goal'] = example['goal'][0].upper() + example['goal'][1:]
-            if example['goal'].endswith('?') or example['goal'].endswith('.'):
-                example['sol1'] = example['sol1'][0].upper(
-                ) + example['sol1'][1:]
-                example['sol2'] = example['sol2'][0].upper(
-                ) + example['sol2'][1:]
+    def load_single(path, data_filename, label_filename):
+        data_path = os.path.join(path, data_filename)
+        label_path = os.path.join(path, label_filename)
+        dataset = []
+        with open(data_path, 'r', encoding='utf-8') as f:
+            data_lines = f.readlines()
+        with open(label_path, 'r', encoding='utf-8') as f:
+            label_lines = f.readlines()
+        assert len(data_lines) == len(label_lines)
+        for data, label in zip(data_lines, label_lines):
+            i = json.loads(data.strip())
+            i['label'] = int(label.strip())
+            # some preprocessing
+            i['goal'] = i['goal'][0].upper() + i['goal'][1:]
+            if i['goal'].endswith('?') or i['goal'].endswith('.'):
+                i['sol1'] = i['sol1'][0].upper() + i['sol1'][1:]
+                i['sol2'] = i['sol2'][0].upper() + i['sol2'][1:]
            else:
-                example['sol1'] = example['sol1'][0].lower(
-                ) + example['sol1'][1:]
-                example['sol2'] = example['sol2'][0].lower(
-                ) + example['sol2'][1:]
-            return example
-
-        dataset = dataset.map(preprocess)
-        return dataset
+                i['sol1'] = i['sol1'][0].lower() + i['sol1'][1:]
+                i['sol2'] = i['sol2'][0].lower() + i['sol2'][1:]
+
+            dataset.append(i)
+
+        return Dataset.from_list(dataset)
+
+    @staticmethod
+    def load(path):
+        train_dataset = piqaDataset_V3.load_single(path, 'train.jsonl',
+                                                   'train-labels.lst')
+        val_dataset = piqaDataset_V3.load_single(path, 'dev.jsonl',
+                                                 'dev-labels.lst')
+        return DatasetDict({'train': train_dataset, 'validation': val_dataset})
--- a/opencompass/datasets/race.py
+++ b/opencompass/datasets/race.py
-from datasets import load_dataset
+import json
+import os
+
+from datasets import Dataset, DatasetDict

 from opencompass.registry import LOAD_DATASET

@@ -10,12 +13,21 @@ class RaceDataset(BaseDataset):

    @staticmethod
    def load(path: str, name: str):
-        dataset = load_dataset(path, name)
-
-        def preprocess(x):
-            for ans, option in zip(['A', 'B', 'C', 'D'], x['options']):
-                x[ans] = option
-            del x['options']
-            return x
-
-        return dataset.map(preprocess)
+        dataset = {}
+        for split in ['validation', 'test']:
+            jsonl_path = os.path.join(path, split, f'{name}.jsonl')
+            dataset_list = []
+            with open(jsonl_path, 'r', encoding='utf-8') as f:
+                for line in f:
+                    line = json.loads(line)
+                    dataset_list.append({
+                        'article': line['article'],
+                        'question': line['question'],
+                        'A': line['options'][0],
+                        'B': line['options'][1],
+                        'C': line['options'][2],
+                        'D': line['options'][3],
+                        'answer': line['answer'],
+                    })
+            dataset[split] = Dataset.from_list(dataset_list)
+        return DatasetDict(dataset)
--- a/opencompass/datasets/siqa.py
+++ b/opencompass/datasets/siqa.py
-from datasets import load_dataset
+import json
+import os
+
+from datasets import Dataset, DatasetDict

 from opencompass.registry import LOAD_DATASET

@@ -6,24 +9,72 @@ from .base import BaseDataset


 @LOAD_DATASET.register_module()
-class siqaDataset_V2(BaseDataset):
+class siqaDataset(BaseDataset):
+    """Disconnect from HuggingFace version of HFDataset."""

    @staticmethod
-    def load(**kwargs):
-        dataset = load_dataset(**kwargs)
+    def load_single(path, data_filename, label_filename):
+        data_path = os.path.join(path, data_filename)
+        label_path = os.path.join(path, label_filename)
+        dataset = []
+        with open(data_path, 'r', encoding='utf-8') as f:
+            data_lines = f.readlines()
+        with open(label_path, 'r', encoding='utf-8') as f:
+            label_lines = f.readlines()
+        assert len(data_lines) == len(label_lines)
+        for data, label in zip(data_lines, label_lines):
+            i = json.loads(data.strip())
+            i['label'] = int(label.strip())
+            dataset.append(i)

-        def preprocess(example):
-            example['all_labels'] = {
+        return Dataset.from_list(dataset)
+
+    @staticmethod
+    def load(path):
+        train_dataset = siqaDataset.load_single(path, 'train.jsonl',
+                                                'train-labels.lst')
+        val_dataset = siqaDataset.load_single(path, 'dev.jsonl',
+                                              'dev-labels.lst')
+        return DatasetDict({'train': train_dataset, 'validation': val_dataset})
+
+
+@LOAD_DATASET.register_module()
+class siqaDataset_V2(BaseDataset):
+    """Disconnect from HuggingFace version of siqaDataset_V2."""
+
+    @staticmethod
+    def load_single(path, data_filename, label_filename):
+        data_path = os.path.join(path, data_filename)
+        label_path = os.path.join(path, label_filename)
+        dataset = []
+        with open(data_path, 'r', encoding='utf-8') as f:
+            data_lines = f.readlines()
+        with open(label_path, 'r', encoding='utf-8') as f:
+            label_lines = f.readlines()
+        assert len(data_lines) == len(label_lines)
+        for data, label in zip(data_lines, label_lines):
+            i = json.loads(data.strip())
+            label = int(label.strip())
+            # some preprocessing
+            i['all_labels'] = {
                'candidates': [
-                    f'A. {example["answerA"]}',
-                    f'B. {example["answerB"]}',
-                    f'C. {example["answerC"]}',
+                    [f'A. {i["answerA"]}', 'A', i['answerA']],
+                    [f'B. {i["answerB"]}', 'B', i['answerB']],
+                    [f'C. {i["answerC"]}', 'C', i['answerC']],
                ],
                'label':
-                int(example['label']) - 1
+                label - 1
            }
-            example['label'] = ' ABC'[int(example['label'])]
-            return example
+            i['label'] = ' ABC'[label]

-        dataset = dataset.map(preprocess)
-        return dataset
+            dataset.append(i)
+
+        return Dataset.from_list(dataset)
+
+    @staticmethod
+    def load(path):
+        train_dataset = siqaDataset_V2.load_single(path, 'train.jsonl',
+                                                   'train-labels.lst')
+        val_dataset = siqaDataset_V2.load_single(path, 'dev.jsonl',
+                                                 'dev-labels.lst')
+        return DatasetDict({'train': train_dataset, 'validation': val_dataset})
--- a/opencompass/datasets/storycloze.py
+++ b/opencompass/datasets/storycloze.py
-from datasets import DatasetDict, load_dataset
+import json
+import os
+
+from datasets import Dataset, DatasetDict

 from opencompass.registry import LOAD_DATASET

@@ -9,38 +12,39 @@ from .base import BaseDataset
 class storyclozeDataset(BaseDataset):

    @staticmethod
-    def load(**kwargs):
-        # special process
-        dataset = load_dataset(**kwargs, split='train+eval')
-
-        def preprocess(example):
-            example['context'] = ' '.join([
-                example['input_sentence_1'], example['input_sentence_2'],
-                example['input_sentence_3'], example['input_sentence_4']
+    def load(path, lang):
+        dataset_list = []
+        for split in ['train', 'eval']:
+            split_path = os.path.join(path, f'{lang}_{split}.jsonl')
+            with open(split_path, 'r', encoding='utf-8') as f:
+                for line in f:
+                    line = json.loads(line)
+                    line['context'] = ' '.join([
+                        line['input_sentence_1'], line['input_sentence_2'],
+                        line['input_sentence_3'], line['input_sentence_4']
                    ])
-            return example
-
-        dataset = dataset.map(preprocess)
-
-        return DatasetDict({'test': dataset})
+                    dataset_list.append(line)
+        dataset_list = Dataset.from_list(dataset_list)
+        return DatasetDict({'test': dataset_list})


 @LOAD_DATASET.register_module()
 class storyclozeDataset_V2(BaseDataset):

    @staticmethod
-    def load(**kwargs):
-        # special process
-        dataset = load_dataset(**kwargs, split='train+eval')
-
-        def preprocess(example):
-            example['context'] = ' '.join([
-                example['input_sentence_1'], example['input_sentence_2'],
-                example['input_sentence_3'], example['input_sentence_4']
+    def load(path, lang):
+        dataset_list = []
+        for split in ['train', 'eval']:
+            split_path = os.path.join(path, f'{lang}_{split}.jsonl')
+            with open(split_path, 'r', encoding='utf-8') as f:
+                for line in f:
+                    line = json.loads(line)
+                    line['context'] = ' '.join([
+                        line['input_sentence_1'], line['input_sentence_2'],
+                        line['input_sentence_3'], line['input_sentence_4']
                    ])
-            example['answer_right_ending'] = ' AB'[
-                example['answer_right_ending']]
-            return example
-
-        dataset = dataset.map(preprocess)
-        return dataset
+                    line['answer_right_ending'] = ' AB'[
+                        line['answer_right_ending']]
+                    dataset_list.append(line)
+        dataset_list = Dataset.from_list(dataset_list)
+        return dataset_list
--- a/opencompass/datasets/strategyqa.py
+++ b/opencompass/datasets/strategyqa.py
+import json
 import re

-from opencompass.registry import TEXT_POSTPROCESSORS
+from datasets import Dataset
+
+from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS
+
+from .base import BaseDataset


 @TEXT_POSTPROCESSORS.register_module('strategyqa')
@@ -16,3 +21,13 @@ def strategyqa_pred_postprocess(text: str) -> str:
 @TEXT_POSTPROCESSORS.register_module('strategyqa_dataset')
 def strategyqa_dataset_postprocess(text: str) -> str:
    return 'yes' if str(text) == 'True' else 'no'
+
+
+@LOAD_DATASET.register_module()
+class StrategyQADataset(BaseDataset):
+
+    @staticmethod
+    def load(path):
+        with open(path, 'r', encoding='utf-8') as f:
+            dataset = json.load(f)
+        return Dataset.from_list(dataset)
--- a/opencompass/datasets/tydiqa.py
+++ b/opencompass/datasets/tydiqa.py
+import json
+import os
 import re
 from collections import Counter

-from datasets import load_dataset
+from datasets import Dataset

 from opencompass.openicl.icl_evaluator import BaseEvaluator
 from opencompass.utils.text_postprocessors import general_postprocess
@@ -12,15 +14,16 @@ from .base import BaseDataset
 class TydiQADataset(BaseDataset):

    @staticmethod
-    def load(**kwargs):
-        dataset = load_dataset(**kwargs)
-
-        def pre_process(example):
-            example['answer'] = example['answers']['text']
-            return example
-
-        dataset = dataset.map(pre_process).remove_columns(['id', 'answers'])
-        return dataset
+    def load(path, lang):
+        path = os.path.join(path, 'dev', f'{lang}-dev.jsonl')
+        dataset_list = []
+        with open(path, 'r', encoding='utf-8') as f:
+            for line in f:
+                line = json.loads(line)
+                answer = list(set([i['text'] for i in line['answers']]))
+                line['answer'] = answer
+                dataset_list.append(line)
+        return Dataset.from_list(dataset_list)


 class TydiQAEvaluator(BaseEvaluator):

--- a/opencompass/datasets/winogrande.py
+++ b/opencompass/datasets/winogrande.py
-from datasets import load_dataset
+import json
+import os
+
+from datasets import Dataset

 from opencompass.registry import LOAD_DATASET

@@ -7,38 +10,49 @@ from .base import BaseDataset

 @LOAD_DATASET.register_module()
 class winograndeDataset(BaseDataset):
+    """Disconnect from Huggingface, winograndeDataset."""

    @staticmethod
-    def load(**kwargs):
-
-        dataset = load_dataset(**kwargs)
-
-        def preprocess(example):
-            prompt = example.pop('sentence')
-            example['opt1'] = prompt.replace('_', example.pop('option1'))
-            example['opt2'] = prompt.replace('_', example.pop('option2'))
-            return example
-
-        return dataset.map(preprocess)
+    def load(path):
+        path = os.path.join(path, 'dev.jsonl')
+        dataset_list = []
+        with open(path, 'r', encoding='utf-8') as f:
+            for line in f:
+                line = json.loads(line)
+                prompt = line['sentence']
+                dataset_list.append({
+                    'opt1':
+                    prompt.replace('_', line['option1']),
+                    'opt2':
+                    prompt.replace('_', line['option2']),
+                    'answer':
+                    line['answer']
+                })
+        dataset_list = Dataset.from_list(dataset_list)
+        return dataset_list


 @LOAD_DATASET.register_module()
 class winograndeDataset_V2(BaseDataset):
+    """Disconnect from Huggingface, winograndeDataset_V2."""

    @staticmethod
-    def load(**kwargs):
-
-        dataset = load_dataset(**kwargs)
-
-        def preprocess(example):
-            prompt = example.pop('sentence')
-            example['opt1'] = prompt.replace('_', example.pop('option1'))
-            example['opt2'] = prompt.replace('_', example.pop('option2'))
-            answer = example.pop('answer')
-            if answer == '':
-                example['label'] = 'NULL'
-            else:
-                example['label'] = ' AB'[int(answer)]
-            return example
-
-        return dataset.map(preprocess)
+    def load(path):
+        path = os.path.join(path, 'dev.jsonl')
+        dataset_list = []
+        with open(path, 'r', encoding='utf-8') as f:
+            for line in f:
+                line = json.loads(line)
+                prompt = line['sentence']
+                answer = line['answer']
+                answer = ' AB'[int(answer)] if answer != '' else 'NULL'
+                dataset_list.append({
+                    'opt1':
+                    prompt.replace('_', line['option1']),
+                    'opt2':
+                    prompt.replace('_', line['option2']),
+                    'answer':
+                    answer
+                })
+        dataset_list = Dataset.from_list(dataset_list)
+        return dataset_list