Unverified Commit dbb20b82 authored by Fengzhe Zhou's avatar Fengzhe Zhou Committed by GitHub
Browse files

[Sync] update (#517)

parent 6f07af30
......@@ -13,9 +13,9 @@ class CMBDataset(BaseDataset):
@staticmethod
def load(path: str):
with open(osp.join(path, 'test.json'), 'r') as f:
with open(osp.join(path, 'test.json'), 'r', encoding='utf-8') as f:
test_data = json.load(f)
with open(osp.join(path, 'val.json'), 'r') as f:
with open(osp.join(path, 'val.json'), 'r', encoding='utf-8') as f:
val_data = json.load(f)
for da in test_data:
......
......@@ -13,7 +13,7 @@ class cmnliDataset_V2(BaseDataset):
@staticmethod
def load(path):
data = []
with open(path, 'r') as f:
with open(path, 'r', encoding='utf-8') as f:
for line in f:
line = json.loads(line)
if line['label'] == '-':
......
......@@ -12,7 +12,7 @@ class CMRCDataset(BaseDataset):
@staticmethod
def load(path: str):
with open(path) as f:
with open(path, 'r', encoding='utf-8') as f:
data = json.load(f)
# 将原始数据转换为所需的格式
rows = []
......
......@@ -13,7 +13,7 @@ class COPADataset_V2(BaseDataset):
@staticmethod
def load(path):
dataset = []
with open(path, 'r') as f:
with open(path, 'r', encoding='utf-8') as f:
for line in f:
line = json.loads(line)
line['label'] = 'AB'[line['label']]
......
......@@ -31,7 +31,7 @@ class CslDataset_V2(BaseDataset):
@staticmethod
def load(path):
data = []
with open(path, 'r') as f:
with open(path, 'r', encoding='utf-8') as f:
for line in f:
line = json.loads(line)
item = {
......
......@@ -12,7 +12,7 @@ class DRCDDataset(BaseDataset):
@staticmethod
def load(path: str):
with open(path) as f:
with open(path, 'r', encoding='utf-8') as f:
data = json.load(f)
# 将原始数据转换为所需的格式
rows = []
......
......@@ -13,7 +13,7 @@ class eprstmtDataset_V2(BaseDataset):
@staticmethod
def load(path):
data = []
with open(path, 'r') as f:
with open(path, 'r', encoding='utf-8') as f:
for line in f:
line = json.loads(line)
item = {
......
from opencompass.openicl import BaseEvaluator
from opencompass.registry import TEXT_POSTPROCESSORS
......@@ -26,3 +27,25 @@ def gsm8k_postprocess(text: str) -> str:
if ret[i].isdigit():
ret1 += ret[i]
return ret1
class Gsm8kEvaluator(BaseEvaluator):
def score(self, predictions, references):
if len(predictions) != len(references):
return {
'error': 'predictions and references have different '
'length'
}
correct = 0
count = 0
details = []
for i, j in zip(predictions, references):
detail = {'pred': i, 'answers': j, 'correct': False}
count += 1
if i == j:
correct += 1
detail['correct'] = True
details.append(detail)
result = {'accuracy': 100 * correct / count, 'details': details}
return result
......@@ -49,7 +49,7 @@ class hellaswagDataset_V3(BaseDataset):
@staticmethod
def load(path):
dataset = []
with open(path, 'r') as f:
with open(path, 'r', encoding='utf-8') as f:
for line in f:
data = json.loads(line)
dataset.append({
......
......@@ -148,11 +148,15 @@ class MATHEvaluator(BaseEvaluator):
}
correct = 0
count = 0
details = []
for i, j in zip(predictions, references):
detail = {'pred': i, 'answer': j, 'correct': False}
count += 1
if self.is_equiv(i, j):
correct += 1
result = {'accuracy': 100 * correct / count}
detail['correct'] = True
details.append(detail)
result = {'accuracy': 100 * correct / count, 'details': details}
return result
def _fix_fracs(self, string):
......
......@@ -52,9 +52,14 @@ class NQEvaluator(BaseEvaluator):
processed_answers = [[general_postprocess(j).lower() for j in i]
for i in references]
details = []
cnt = 0
for pred, cand_ans in zip(processed_predictions, processed_answers):
detail = {'pred': pred, 'answer': cand_ans, 'correct': False}
cnt += int(any([cand == pred for cand in cand_ans]))
if int(any([cand == pred for cand in cand_ans])):
detail['correct'] = True
details.append(detail)
score = cnt / len(predictions) * 100
return {'score': score}
return {'score': score, 'details': details}
......@@ -67,7 +67,7 @@ class TNewsDataset_V2(BaseDataset):
}
data = []
with open(path, 'r') as f:
with open(path, 'r', encoding='utf-8') as f:
for line in f:
line = json.loads(line)
item = {
......
......@@ -51,9 +51,14 @@ class TriviaQAEvaluator(BaseEvaluator):
processed_answers = [[general_postprocess(j).lower() for j in i]
for i in references]
details = []
cnt = 0
for pred, cand_ans in zip(processed_predictions, processed_answers):
detail = {'pred': pred, 'answer': cand_ans, 'correct': False}
cnt += int(any([cand == pred for cand in cand_ans]))
if int(any([cand == pred for cand in cand_ans])):
detail['correct'] = True
details.append(detail)
score = cnt / len(predictions) * 100
return {'score': score}
return {'score': score, 'details': details}
......@@ -82,6 +82,20 @@ def strategyqa_pred_postprocess(text: str) -> str:
return ''
def flores_postprocess(text: str) -> str:
text = text.strip().split('\n')[-1].strip()
return text
def flores_postprocess_chinese(text: str) -> str:
text = text.strip().split('\n')[-1].strip()
import jieba
truncated_text = text.strip().split('\n')[0]
cleaned_text = re.sub(r'\s+', ' ', truncated_text).strip()
cleaned_text = ' '.join(jieba.cut(cleaned_text))
return cleaned_text
def record_postprocess(text: str) -> str:
match = re.search(r'(?<=refers to )[^.]+', text)
......
......@@ -24,11 +24,18 @@ class EMEvaluator(BaseEvaluator):
for i in references]
cnt = 0
details = []
for pred, ans, origin_ans in zip(predictions, processed_answers,
references):
answers = list(set(ans + origin_ans))
detail = {'pred': pred, 'answer': answers}
if pred in ans or pred in origin_ans:
cnt += 1
detail['correct'] = True
else:
detail['correct'] = False
details.append(detail)
score = cnt / len(predictions) * 100
return {'score': score}
return {'score': score, 'details': details}
......@@ -51,8 +51,7 @@ class BaseInferencer:
self.output_json_filepath = output_json_filepath
self.output_json_filename = output_json_filename
self.is_main_process = is_main_process()
if not os.path.exists(self.output_json_filepath):
os.makedirs(self.output_json_filepath)
os.makedirs(self.output_json_filepath, exist_ok=True)
def inference(self,
retriever: BaseRetriever,
......
......@@ -94,6 +94,7 @@ class PPLInferencer(BaseInferencer):
index = 0
prompt_list = []
sub_ppl_list = []
token_num_list = []
normalizing_prompt_list = []
context_length_list = []
......@@ -144,6 +145,7 @@ class PPLInferencer(BaseInferencer):
mode='ppl'))
normalizing_prompt_list.append(normalizing_prompt)
prompt_list.append(prompt)
token_num_list.append(prompt_token_num)
if normalizing_str is not None:
normalizing_str_len = self.model.get_token_len_from_template(
......@@ -186,6 +188,10 @@ class PPLInferencer(BaseInferencer):
ice_str = self.model.parse_template(ice[idx], mode='ppl')
output_handler.save_prompt_and_ppl(
label, prompt.replace(ice_str, ''), prompt, res, index)
output_handler.results_dict[str(
index)][f'label: {str(label)}'][
'BPB'] = res * token_num_list[idx] / len(
prompt.replace(ice_str, '').encode())
index = index + 1
ppl.append(sub_ppl_list)
......
from abc import abstractmethod
from copy import deepcopy
from typing import Dict, List
from typing import Dict, List, Optional
from mmengine.config import ConfigDict
......@@ -13,15 +13,23 @@ class BasePartitioner:
Args:
out_dir (str): The output directory of tasks.
keep_keys (List[str]): The keys to be kept from the experiment config
to the task config.
keep_keys (Optional[List[str]], optional): The keys to be kept from the
experiment config to the task config. Defaults to None. If None,
the following keys will be kept:
- eval.runner.task.judge_cfg
- eval.runner.task.dump_details
"""
def __init__(self,
out_dir: str,
keep_keys: List[str] = ['eval.runner.task.judge_cfg']):
def __init__(self, out_dir: str, keep_keys: Optional[List[str]] = None):
self.logger = get_logger()
self.out_dir = out_dir
if keep_keys is None:
self.keep_keys = [
'eval.runner.task.judge_cfg',
'eval.runner.task.dump_details',
]
else:
self.keep_keys = keep_keys
def __call__(self, cfg: ConfigDict) -> List[Dict]:
......@@ -63,7 +71,8 @@ class BasePartitioner:
tgt_ptr = tgt_ptr[key]
tgt_ptr[key_chain[-1]] = ori_ptr[key_chain[-1]]
except Exception:
self.logger.warning(f'Key {k} not found in config, ignored.')
self.logger.debug(f'Key {k} not found in config, ignored.')
self.logger.debug(f'Additional config: {add_cfg}')
tasks = self.partition(models,
datasets,
......
import os.path as osp
from typing import Dict, List
from typing import Dict, List, Optional
from mmengine.config import Config, ConfigDict
......@@ -11,15 +11,23 @@ from .base import BasePartitioner
@PARTITIONERS.register_module()
class NaivePartitioner(BasePartitioner):
"""Naive task partitioner. This partitioner will generate a task for each
model-dataset pair.
"""Naive task partitioner. This partitioner will generate a task for each n
model-dataset pairs.
Args:
out_dir (str): The output directory of tasks.
n (int): The number of model-dataset pairs in each task.
keep_keys (List[str]): The keys to be kept from the experiment config
to the task config.
"""
def __init__(self,
out_dir: str,
n: int = 1,
keep_keys: Optional[List[str]] = None):
super().__init__(out_dir=out_dir, keep_keys=keep_keys)
self.n = n
def partition(self,
models: List[ConfigDict],
datasets: List[ConfigDict],
......@@ -53,13 +61,17 @@ class NaivePartitioner(BasePartitioner):
tasks = []
for model in models:
chunks = []
for dataset in datasets:
filename = get_infer_output_path(model, dataset, out_dir)
if osp.exists(filename):
continue
chunks.append(dataset)
for i in range(0, len(chunks), self.n):
task = Config({
'models': [model],
'datasets': [[dataset]],
'datasets': [chunks[i:i + self.n]],
'work_dir': work_dir,
**add_cfg
})
......
......@@ -2,7 +2,7 @@ import copy
import math
import os.path as osp
from fnmatch import fnmatch
from typing import Dict, List, Tuple, Union
from typing import Dict, List, Optional, Tuple, Union
import mmengine
from mmengine.config import Config, ConfigDict
......@@ -24,6 +24,11 @@ class SizePartitioner(BasePartitioner):
max_task_size (int): The maximum size of a task.
gen_task_coef (int): The dataset cost measurement coefficient for
generation tasks.
strategy (str): The partition strategy. Supported strategies are:
'heuristic' and 'split'. Defaults to 'heuristic'.
heuristic: split large datasets into several tasks, merge small
datasets into one task.
split: split large datasets into several tasks only.
dataset_size_path (str): The path to the dataset size cache file.
keep_keys (list[str]): The keys to be kept from the experiment config
to the task config.
......@@ -33,12 +38,17 @@ class SizePartitioner(BasePartitioner):
out_dir: str,
max_task_size: int = 40000,
gen_task_coef: int = 20,
strategy: str = 'heuristic',
dataset_size_path: str = '.cache/dataset_size.json',
keep_keys: List[str] = ['eval.runner.task.judge_cfg']):
keep_keys: Optional[List[str]] = None):
super().__init__(out_dir=out_dir, keep_keys=keep_keys)
self.max_task_size = max_task_size
self.gen_task_coef = gen_task_coef
self.dataset_size_path = dataset_size_path
assert strategy in ('heuristic', 'split'), \
f'Unsupported partition strategy: {strategy}. '\
'Supported strategies are: `heuristic`, `split` .'
self.strategy = strategy
def partition(self,
models: List[ConfigDict],
......@@ -79,47 +89,47 @@ class SizePartitioner(BasePartitioner):
reverse=True)
tasks = []
for model in models:
task = Config({
'models': [model],
'datasets': [[]],
'work_dir': work_dir,
**add_cfg
})
num_data = 0
chunks = [] # elements: tuple(size, dataset_chunk)
for dataset in datasets:
filename = get_infer_output_path(model, dataset, out_dir)
root, ext = osp.splitext(filename)
# skip the task if the task output exists
if osp.exists(filename):
continue
dataset_size = self.get_cost(dataset)
if dataset_size > self.max_task_size:
root, ext = osp.splitext(filename)
dataset_splits = self.split_dataset(dataset)
for i, dataset_split in enumerate(dataset_splits):
# skip the task it the task output exists
if not osp.exists(f'{root}_{i}{ext}'):
chunks.append((self.max_task_size, dataset_split))
else:
chunks.append((dataset_size, dataset))
if self.strategy == 'heuristic':
chunks = sorted(chunks, key=lambda x: x[0], reverse=True)
current_size, current_chunks = 0, []
for index in range(len(chunks)):
current_size += chunks[index][0]
current_chunks.append(chunks[index][1])
if index == len(chunks) - 1 or current_size + chunks[
index + 1][0] > self.max_task_size:
tasks.append(
Config({
'models': [model],
'datasets': [[dataset_split]],
'datasets': [current_chunks],
'work_dir': work_dir,
**add_cfg
}))
else:
if num_data + dataset_size > self.max_task_size:
tasks.append(task)
task = Config({
current_size, current_chunks = 0, []
elif self.strategy == 'split':
for _, dataset in chunks:
tasks.append(
Config({
'models': [model],
'datasets': [[]],
'datasets': [[dataset]],
'work_dir': work_dir,
**add_cfg
})
num_data = 0
task['datasets'][0].append(dataset)
num_data = num_data + dataset_size
if task['datasets'][0]:
tasks.append(task)
}))
return tasks
@property
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment