Unverified Commit 32f40a8f authored by Fengzhe Zhou's avatar Fengzhe Zhou Committed by GitHub
Browse files

[Sync] Sync with internal codes 2023.01.08 (#777)

parent 8194199d
...@@ -5,6 +5,7 @@ from .icl_circular_evaluator import CircularEvaluator # noqa ...@@ -5,6 +5,7 @@ from .icl_circular_evaluator import CircularEvaluator # noqa
from .icl_em_evaluator import EMEvaluator # noqa from .icl_em_evaluator import EMEvaluator # noqa
from .icl_hf_evaluator import * # noqa from .icl_hf_evaluator import * # noqa
from .icl_jieba_rouge_evaluator import JiebaRougeEvaluator # noqa from .icl_jieba_rouge_evaluator import JiebaRougeEvaluator # noqa
from .icl_misc_evaluator import AverageMinKEvaluator # noqa
from .icl_misc_evaluator import AveragePPLEvaluator # noqa from .icl_misc_evaluator import AveragePPLEvaluator # noqa
from .icl_toxic_evaluator import ToxicEvaluator # noqa from .icl_toxic_evaluator import ToxicEvaluator # noqa
from .lm_evaluator import LMEvaluator # noqa from .lm_evaluator import LMEvaluator # noqa
...@@ -210,6 +210,20 @@ class BleuEvaluator(HuggingfaceEvaluator): ...@@ -210,6 +210,20 @@ class BleuEvaluator(HuggingfaceEvaluator):
super().__init__(metric='sacrebleu') super().__init__(metric='sacrebleu')
class BleuFloresEvaluator(HuggingfaceEvaluator):
"""Bleu evaluator using flores200 tokenize."""
def __init__(self) -> None:
super().__init__(metric='sacrebleu')
def _preprocess(self, predictions: List, references: List) -> dict:
return {
'predictions': predictions,
'references': references,
'tokenize': 'flores200',
}
@ICL_EVALUATORS.register_module() @ICL_EVALUATORS.register_module()
class MccEvaluator(AccEvaluator): class MccEvaluator(AccEvaluator):
"""Matthews correlation evaluator.""" """Matthews correlation evaluator."""
......
...@@ -9,3 +9,11 @@ class AveragePPLEvaluator(BaseEvaluator): ...@@ -9,3 +9,11 @@ class AveragePPLEvaluator(BaseEvaluator):
def score(self, ppl): def score(self, ppl):
average_ppl = sum(ppl) / len(ppl) average_ppl = sum(ppl) / len(ppl)
return {'average_ppl': average_ppl} return {'average_ppl': average_ppl}
@ICL_EVALUATORS.register_module()
class AverageMinKEvaluator(BaseEvaluator):
def score(self, mink):
average_mink = sum(mink) / len(mink)
return {'average_mink': average_mink}
...@@ -4,7 +4,8 @@ from .icl_base_inferencer import BaseInferencer # noqa ...@@ -4,7 +4,8 @@ from .icl_base_inferencer import BaseInferencer # noqa
from .icl_chat_inferencer import ChatInferencer # noqa from .icl_chat_inferencer import ChatInferencer # noqa
from .icl_clp_inferencer import CLPInferencer # noqa from .icl_clp_inferencer import CLPInferencer # noqa
from .icl_gen_inferencer import GenInferencer # noqa from .icl_gen_inferencer import GenInferencer # noqa
from .icl_loglikelihood_inferencer import LoglikelihoodInferencer # noqa from .icl_ll_inferencer import LLInferencer # noqa
from .icl_mink_percent_inferencer import MinKPercentInferencer # noqa
from .icl_ppl_inferencer import PPLInferencer # noqa from .icl_ppl_inferencer import PPLInferencer # noqa
from .icl_ppl_only_inferencer import PPLOnlyInferencer # noqa from .icl_ppl_only_inferencer import PPLOnlyInferencer # noqa
from .icl_sc_inferencer import SCInferencer # noqa from .icl_sc_inferencer import SCInferencer # noqa
......
...@@ -18,7 +18,7 @@ logger = get_logger(__name__) ...@@ -18,7 +18,7 @@ logger = get_logger(__name__)
@ICL_INFERENCERS.register_module() @ICL_INFERENCERS.register_module()
class LoglikelihoodInferencer(BaseInferencer): class LLInferencer(BaseInferencer):
"""Loglikelihood Inferencer class to evaluate by loglikelihood. """Loglikelihood Inferencer class to evaluate by loglikelihood.
Attributes: Attributes:
...@@ -60,7 +60,7 @@ class LoglikelihoodInferencer(BaseInferencer): ...@@ -60,7 +60,7 @@ class LoglikelihoodInferencer(BaseInferencer):
output_json_filepath: Optional[str] = None, output_json_filepath: Optional[str] = None,
output_json_filename: Optional[str] = None) -> List: output_json_filename: Optional[str] = None) -> List:
# 1. Preparation for output logs # 1. Preparation for output logs
output_handler = LoglikelihoodInferencerOutputHandler() output_handler = LLInferencerOutputHandler()
sub_predictions = [] sub_predictions = []
ppl = [] ppl = []
...@@ -126,8 +126,10 @@ class LoglikelihoodInferencer(BaseInferencer): ...@@ -126,8 +126,10 @@ class LoglikelihoodInferencer(BaseInferencer):
token_num_list.append(prompt_token_num) token_num_list.append(prompt_token_num)
cont_list.append(retriever.test_ds[idx]['cont']) cont_list.append(retriever.test_ds[idx]['cont'])
# 5.2 Get PPL # 5.2 Get loglikelihood
logger.info(f"Calculating PPL for prompts labeled '{label}'") logger.info(
f"Calculating Loglikelihood for prompts labeled '{label}'"
) # noqa
for idx in trange(0, for idx in trange(0,
len(prompt_list), len(prompt_list),
self.batch_size, self.batch_size,
...@@ -137,8 +139,10 @@ class LoglikelihoodInferencer(BaseInferencer): ...@@ -137,8 +139,10 @@ class LoglikelihoodInferencer(BaseInferencer):
with torch.no_grad(): with torch.no_grad():
# mainly modify compared to PPLInferencer # mainly modify compared to PPLInferencer
sub_res = self.model.get_loglikelihood_from_template( sub_inputs = self.model.parse_template(sub_prompt_list,
sub_prompt_list, sub_cont_list).tolist() mode='ppl')
sub_res = self.model.get_loglikelihood(
sub_inputs, sub_cont_list).tolist()
for res, prompt in zip( for res, prompt in zip(
sub_res, sub_res,
self.model.parse_template(sub_prompt_list, self.model.parse_template(sub_prompt_list,
...@@ -174,7 +178,7 @@ class LoglikelihoodInferencer(BaseInferencer): ...@@ -174,7 +178,7 @@ class LoglikelihoodInferencer(BaseInferencer):
] ]
class LoglikelihoodInferencerOutputHandler: class LLInferencerOutputHandler:
results_dict = {} results_dict = {}
def __init__(self) -> None: def __init__(self) -> None:
......
"""PPL Inferencer."""
import os
from typing import List, Optional
import mmengine
import torch
from tqdm import tqdm
from opencompass.models.base import BaseModel
from opencompass.registry import ICL_INFERENCERS
from ..icl_prompt_template import PromptTemplate
from ..icl_retriever import BaseRetriever
from ..utils import get_logger
from .icl_base_inferencer import BaseInferencer, dump_results_dict
logger = get_logger(__name__)
@ICL_INFERENCERS.register_module()
class MinKPercentInferencer(BaseInferencer):
"""PPLOnlyInferencer class to calculate PPL and PPL only, no choice is
made. This Inferencer is usually used along with AveragePPLEvaluator.
Attributes:
model (:obj:`BaseModel`, optional): The module to inference.
max_seq_len (:obj:`int`): Maximum number of tokenized words allowed by
the LM.
batch_size (:obj:`int`, optional): Batch size for the :obj:`DataLoader`
output_json_filepath (:obj:`str`, optional): File path for output
`JSON` file.
output_json_filename (:obj:`str`, optional): File name for output
`JSON` file.
save_every (:obj:`int`, optional): Save intermediate results every
"""
def __init__(
self,
model: BaseModel,
max_seq_len: Optional[int] = None,
batch_size: Optional[int] = 1,
output_json_filepath: Optional[str] = './icl_inference_output',
output_json_filename: Optional[str] = 'predictions',
save_every: Optional[int] = 1,
**kwargs) -> None:
super().__init__(
model=model,
max_seq_len=max_seq_len,
batch_size=batch_size,
output_json_filename=output_json_filename,
output_json_filepath=output_json_filepath,
**kwargs,
)
self.save_every = save_every
def inference(self,
retriever: BaseRetriever,
ice_template: Optional[PromptTemplate] = None,
prompt_template: Optional[PromptTemplate] = None,
output_json_filepath: Optional[str] = None,
output_json_filename: Optional[str] = None) -> List:
# 1. Preparation for output logs
output_handler = PPLOnlyInferencerOutputHandler()
if output_json_filepath is None:
output_json_filepath = self.output_json_filepath
if output_json_filename is None:
output_json_filename = self.output_json_filename
# 2. Get results of retrieval process
ice_idx_list = retriever.retrieve()
# 3. Generate prompts for testing input
prompt_list = self.get_generation_prompt_list_from_retriever_indices(
ice_idx_list,
retriever,
max_seq_len=self.max_seq_len,
ice_template=ice_template,
prompt_template=prompt_template)
# 3.1 Fetch and zip prompt & gold answer if output column exists
ds_reader = retriever.dataset_reader
assert ds_reader.output_column is None, (
'PPLOnlyInferencer supports `output_column=None` only.')
# Create tmp json file for saving intermediate results and future
# resuming
index = 0
tmp_json_filepath = os.path.join(output_json_filepath,
'tmp_' + output_json_filename)
if os.path.exists(tmp_json_filepath):
# TODO: move resume to output handler
try:
tmp_result_dict = mmengine.load(tmp_json_filepath)
except Exception:
pass
else:
output_handler.results_dict = tmp_result_dict
index = len(tmp_result_dict)
# 4. Wrap prompts with Dataloader
dataloader = self.get_dataloader(prompt_list[index:], self.batch_size)
# 5. Inference for prompts in each batch
logger.info('Starting inference process...')
for datum in tqdm(dataloader, disable=not self.is_main_process):
entry = datum
# 5-1. Inference with local model
with torch.no_grad():
sub_inputs = self.model.parse_template(entry, mode='ppl')
minks = self.model.get_mink_percent(sub_inputs).tolist()
parsed_entries = self.model.parse_template(entry, mode='gen')
# 5-3. Save current output
for prompt, mink, in zip(parsed_entries, minks):
output_handler.save_results(prompt, mink, index)
index = index + 1
# 5-4. Save intermediate results
if (self.save_every is not None and index % self.save_every == 0
and self.is_main_process):
output_handler.write_to_json(output_json_filepath,
'tmp_' + output_json_filename)
# 6. Output
if self.is_main_process:
os.makedirs(output_json_filepath, exist_ok=True)
output_handler.write_to_json(output_json_filepath,
output_json_filename)
if os.path.exists(tmp_json_filepath):
os.remove(tmp_json_filepath)
return [
sample['mink'] for sample in output_handler.results_dict.values()
]
def get_generation_prompt_list_from_retriever_indices(
self,
ice_idx_list: List[List[int]],
retriever: BaseRetriever,
max_seq_len: Optional[int] = None,
ice_template: Optional[PromptTemplate] = None,
prompt_template: Optional[PromptTemplate] = None):
prompt_list = []
for idx, ice_idx in enumerate(ice_idx_list):
ice = retriever.generate_ice(ice_idx, ice_template=ice_template)
prompt = retriever.generate_prompt_for_generate_task(
idx,
ice,
ice_template=ice_template,
prompt_template=prompt_template)
if max_seq_len is not None:
prompt_token_num = self.model.get_token_len_from_template(
prompt, mode='gen')
while len(ice_idx) > 0 and prompt_token_num > max_seq_len:
ice_idx = ice_idx[:-1]
ice = retriever.generate_ice(ice_idx,
ice_template=ice_template)
prompt = retriever.generate_prompt_for_generate_task(
idx,
ice,
ice_template=ice_template,
prompt_template=prompt_template)
prompt_token_num = self.model.get_token_len_from_template(
prompt, mode='gen')
prompt_list.append(prompt)
return prompt_list
class PPLOnlyInferencerOutputHandler:
origin_prompt_dict = {}
output_dict = {}
results_dict = {}
def __init__(self) -> None:
self.results_dict = {}
def write_to_json(self, save_dir: str, filename: str):
"""Dump the result to a json file."""
dump_results_dict(self.results_dict, os.path.join(save_dir, filename))
def save_results(self, origin_prompt, mink, idx):
self.results_dict[str(idx)] = {
'origin_prompt': origin_prompt,
'mink': mink,
}
from .mm_naive import * # noqa: F401, F403 from .mm_naive import * # noqa: F401, F403
from .naive import * # noqa: F401, F403 from .naive import * # noqa: F401, F403
from .num_worker import * # noqa: F401, F403
from .size import * # noqa: F401, F403 from .size import * # noqa: F401, F403
import copy
import math
import os.path as osp
from typing import Dict, List, Optional
import mmengine
from mmengine.config import Config, ConfigDict
from opencompass.registry import PARTITIONERS
from opencompass.utils import (build_dataset_from_cfg, dataset_abbr_from_cfg,
get_infer_output_path)
from .base import BasePartitioner
@PARTITIONERS.register_module()
class NumWorkerPartitioner(BasePartitioner):
"""Task partitioner based on the pre-defined number of workers.
Args:
out_dir (str): The output directory of tasks.
num_worker (int): The number of workers. default: 8.
min_task_size (int): The minimum size of a task. default: 16.
dataset_size_path (str): The path to the dataset size cache file.
keep_keys (list[str]): The keys to be kept from the experiment config
to the task config.
"""
def __init__(self,
out_dir: str,
num_worker: int = 8,
min_task_size: int = 16,
dataset_size_path: str = '.cache/dataset_size.json',
keep_keys: Optional[List[str]] = None):
super().__init__(out_dir=out_dir, keep_keys=keep_keys)
self.num_worker = num_worker
self.min_task_size = min_task_size
self.dataset_size_path = dataset_size_path
def partition(self,
model_dataset_combinations: List[Dict[str, List]],
work_dir: str,
out_dir: str,
add_cfg: Dict = {}) -> List[ConfigDict]:
# intentionally avoid any sort here,
# for user's abaility to manipulate the order
tasks = []
for comb in model_dataset_combinations:
for model in comb['models']:
chunks = []
for dataset in comb['datasets']:
filename = get_infer_output_path(model, dataset, out_dir)
# skip the task if the task output exists
if osp.exists(filename):
continue
dataset_size = self.get_size(dataset)
if dataset_size > self.min_task_size:
root, ext = osp.splitext(filename)
dataset_splits = self.split_dataset(dataset)
for i, dataset_split in enumerate(dataset_splits):
if not osp.exists(f'{root}_{i}{ext}'):
chunks.append(dataset_split)
else:
chunks.append(dataset)
buckets = [[] for _ in range(self.num_worker)]
for i, chunk in enumerate(chunks):
buckets[i % self.num_worker].append(chunk)
for bucket in buckets:
if len(bucket) > 0:
tasks.append(
Config({
'models': [model],
'datasets': [bucket],
'work_dir': work_dir,
**add_cfg
}))
return tasks
@property
def dataset_size(self):
if not hasattr(self, '_dataset_size'):
if osp.exists(self.dataset_size_path):
self._dataset_size = mmengine.load(self.dataset_size_path)
else:
self._dataset_size = {}
return self._dataset_size
def split_dataset(self, dataset_cfg: ConfigDict) -> List[ConfigDict]:
"""Split dataset into several parts."""
dataset_size = self.get_size(dataset_cfg)
split_configs = []
abbr = dataset_abbr_from_cfg(dataset_cfg)
# evenly distribute the task
num_split = self.num_worker
step = max(math.ceil(dataset_size / num_split), self.min_task_size)
for part, i in enumerate(range(0, dataset_size, step)):
cfg = copy.deepcopy(dataset_cfg)
cfg['abbr'] = abbr + f'_{part}'
test_range = cfg['reader_cfg'].get('test_range', '')
cfg['reader_cfg']['test_range'] = f'{test_range}[{i}:{i+step}]'
split_configs.append(cfg)
return split_configs
def get_size(self, dataset: ConfigDict) -> int:
dataset_abbr = dataset_abbr_from_cfg(dataset)
test_range = dataset.reader_cfg.get('test_range', '')
if dataset_abbr in self.dataset_size:
actual_size = eval('len(range(self.dataset_size[dataset_abbr])'
f'{test_range})')
return actual_size
dataset = build_dataset_from_cfg(dataset)
self.dataset_size[dataset_abbr] = len(dataset.test)
mmengine.mkdir_or_exist('.cache/')
mmengine.dump(self.dataset_size,
self.dataset_size_path,
indent=4,
ensure_ascii=False)
actual_size = eval('len(range(self.dataset_size[dataset_abbr])'
f'{test_range})')
return actual_size
...@@ -110,7 +110,7 @@ class SlurmRunner(BaseRunner): ...@@ -110,7 +110,7 @@ class SlurmRunner(BaseRunner):
tmpl += f' --gres=gpu:{num_gpus}' tmpl += f' --gres=gpu:{num_gpus}'
for extra_cmd in self.extra_command: for extra_cmd in self.extra_command:
tmpl += f' {extra_cmd}' tmpl += f' {extra_cmd}'
tmpl += f" -N1 -J '{task_name[:512]}'" + ' {task_cmd}' tmpl += f" -N1 -u -J '{task_name[:512]}'" + ' {task_cmd}'
get_cmd = partial(task.get_command, get_cmd = partial(task.get_command,
cfg_path=param_file, cfg_path=param_file,
template=tmpl) template=tmpl)
......
...@@ -140,17 +140,23 @@ class SlurmSequentialRunner(BaseRunner): ...@@ -140,17 +140,23 @@ class SlurmSequentialRunner(BaseRunner):
tbar = tqdm(total=len(job_ids), desc='clear sruns') tbar = tqdm(total=len(job_ids), desc='clear sruns')
for batched_job_ids in batched(job_ids, 4): for batched_job_ids in batched(job_ids, 4):
ps = [] while True:
for job_id in batched_job_ids: ps = []
tbar.update() try:
if job_id is None: for job_id in batched_job_ids:
continue tbar.update()
cmd = f'scancel {job_id}' if job_id is None:
p = subprocess.Popen(cmd, continue
shell=True, cmd = f'scancel {job_id}'
stdout=subprocess.PIPE, p = subprocess.Popen(cmd,
stderr=subprocess.STDOUT) shell=True,
ps.append(p) stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
ps.append(p)
break
except KeyboardInterrupt:
logger = get_logger()
logger.error('Ignoring KeyboardInterrupt...')
for p in ps: for p in ps:
p.wait() p.wait()
tbar.close() tbar.close()
...@@ -182,7 +188,7 @@ class SlurmSequentialRunner(BaseRunner): ...@@ -182,7 +188,7 @@ class SlurmSequentialRunner(BaseRunner):
tmpl += f' --gres=gpu:{num_gpus}' tmpl += f' --gres=gpu:{num_gpus}'
for extra_cmd in self.extra_command: for extra_cmd in self.extra_command:
tmpl += f' {extra_cmd}' tmpl += f' {extra_cmd}'
tmpl += f" -N1 -J '{task_name[:512]}'" + ' {task_cmd}' tmpl += f" -N1 -u -J '{task_name[:512]}'" + ' {task_cmd}'
get_cmd = partial(task.get_command, get_cmd = partial(task.get_command,
cfg_path=param_file, cfg_path=param_file,
template=tmpl) template=tmpl)
......
...@@ -127,6 +127,8 @@ class DefaultSummarizer: ...@@ -127,6 +127,8 @@ class DefaultSummarizer:
dataset_eval_mode[dataset_abbr] = 'gen' dataset_eval_mode[dataset_abbr] = 'gen'
elif 'PPLInferencer' in inferencer: elif 'PPLInferencer' in inferencer:
dataset_eval_mode[dataset_abbr] = 'ppl' dataset_eval_mode[dataset_abbr] = 'ppl'
elif 'LLInferencer' in inferencer:
dataset_eval_mode[dataset_abbr] = 'll'
else: else:
dataset_eval_mode[dataset_abbr] = 'unknown' dataset_eval_mode[dataset_abbr] = 'unknown'
self.logger.warning(f'unknown inferencer: {inferencer} - {dataset_abbr}') self.logger.warning(f'unknown inferencer: {inferencer} - {dataset_abbr}')
......
...@@ -164,8 +164,11 @@ class PretrainSummarizer: ...@@ -164,8 +164,11 @@ class PretrainSummarizer:
time = now.strftime('%m/%d %H:%M') time = now.strftime('%m/%d %H:%M')
times = [time] * len(model_abbrs) times = [time] * len(model_abbrs)
table.append(header) table.append(header)
table.append(['dataset', 'version', 'metric', 'mode'] + times) table.append(['time', 'version', 'metric', 'mode'] + times)
table.append(['dataset', 'version', 'metric', 'mode']+ checkpoints) table.append(['checkpoint', 'version', 'metric', 'mode']+ checkpoints)
# check long bench
max_seq_lens = [str(model_cfg.max_seq_len) for model_cfg in model_cfgs]
table.append(['max_seq_len', 'version', 'metric', 'mode']+ max_seq_lens)
dataset_score = [0]* len(model_abbrs) dataset_score = [0]* len(model_abbrs)
dataset_num = [0] * len(model_abbrs) dataset_num = [0] * len(model_abbrs)
...@@ -187,11 +190,9 @@ class PretrainSummarizer: ...@@ -187,11 +190,9 @@ class PretrainSummarizer:
row = [dataset_abbr, prompt_version.get(dataset_abbr, '-'), metric, dataset_eval_mode.get(dataset_abbr, '-')] row = [dataset_abbr, prompt_version.get(dataset_abbr, '-'), metric, dataset_eval_mode.get(dataset_abbr, '-')]
for i, model_abbr in enumerate(model_abbrs): for i, model_abbr in enumerate(model_abbrs):
if dataset_abbr in parsed_results[model_abbr]: if dataset_abbr in parsed_results[model_abbr]:
if index == 0: row.append('{:.02f}'.format(parsed_results[model_abbr][dataset_abbr][index]))
row.append('{:.02f}'.format(parsed_results[model_abbr][dataset_abbr][index])) dataset_score[i] += parsed_results[model_abbr][dataset_abbr][index]
dataset_score[i] += parsed_results[model_abbr][dataset_abbr][index] dataset_num[i] += 1
dataset_num[i] += 1
# row.append('{:.02f}'.format(parsed_results[model_abbr][dataset_abbr][index]))
else: else:
if dataset_abbr.startswith('---') and dataset_num[i] != 0: if dataset_abbr.startswith('---') and dataset_num[i] != 0:
row.append('{:.02f}'.format(dataset_score[i] / dataset_num[i])) row.append('{:.02f}'.format(dataset_score[i] / dataset_num[i]))
......
...@@ -216,8 +216,8 @@ class OpenICLEvalTask(BaseTask): ...@@ -216,8 +216,8 @@ class OpenICLEvalTask(BaseTask):
result = icl_evaluator.score(**preds) result = icl_evaluator.score(**preds)
if self.dump_details: if self.dump_details:
details = result.get('details', None)
try: try:
details = result.pop('details', None)
result['details'] = self.format_details( result['details'] = self.format_details(
pred_strs, test_set[self.output_column], details, pred_strs, test_set[self.output_column], details,
pred_dicts) pred_dicts)
...@@ -225,13 +225,10 @@ class OpenICLEvalTask(BaseTask): ...@@ -225,13 +225,10 @@ class OpenICLEvalTask(BaseTask):
if 'PPL' in str( if 'PPL' in str(
self.dataset_cfg.infer_cfg.inferencer.type): self.dataset_cfg.infer_cfg.inferencer.type):
result['correct_bpb'], result[ result['correct_bpb'], result['incorrect_bpb'] = \
'incorrect_bpb'] = self.calculate_bpb(pred_dicts) self.calculate_bpb(pred_dicts)
else:
result['incorrect_bpb'] = result['correct_bpb'] = -1
except Exception as e: except Exception as e:
self.logger.warning(f'Skip dumping details due to: {e}.') self.logger.warning(f'Skip dumping details due to: {e}.')
result['incorrect_bpb'] = result['correct_bpb'] = -1
else: else:
result.pop('details', None) result.pop('details', None)
......
...@@ -43,7 +43,9 @@ class OpenICLInferTask(BaseTask): ...@@ -43,7 +43,9 @@ class OpenICLInferTask(BaseTask):
the command. the command.
""" """
script_path = __file__ script_path = __file__
if self.num_gpus > 0: has_vllm = ('VLLM' in str(self.model_cfgs[0].get('type', ''))) or \
'VLLM' in str(self.model_cfgs[0].get('llm', {}).get('type', ''))
if self.num_gpus > 0 and not has_vllm:
port = random.randint(12000, 32000) port = random.randint(12000, 32000)
command = (f'torchrun --master_port={port} ' command = (f'torchrun --master_port={port} '
f'--nproc_per_node {self.num_procs} ' f'--nproc_per_node {self.num_procs} '
......
...@@ -57,7 +57,7 @@ def last_capital_postprocess(text: str) -> str: ...@@ -57,7 +57,7 @@ def last_capital_postprocess(text: str) -> str:
return '' return ''
def first_option_postprocess(text: str, options: str) -> str: def first_option_postprocess(text: str, options: str, cushion=True) -> str:
"""Find first valid option for text.""" """Find first valid option for text."""
# yapf: disable # yapf: disable
...@@ -91,26 +91,31 @@ def first_option_postprocess(text: str, options: str) -> str: ...@@ -91,26 +91,31 @@ def first_option_postprocess(text: str, options: str) -> str:
f'[是为。]\s?([{options}])[。\.]?$', f'[是为。]\s?([{options}])[。\.]?$',
f'因此\s?([{options}])[。\.]?$', f'因此\s?([{options}])[。\.]?$',
f'显然\s?([{options}])[。\.]?$', f'显然\s?([{options}])[。\.]?$',
f'1.\s?(.*?)$',
f'答案是\s?(\S+)(?:。|$)', f'答案是\s?(\S+)(?:。|$)',
f'答案应该是\s?(\S+)(?:。|$)', f'答案应该是\s?(\S+)(?:。|$)',
f'答案为\s?(\S+)(?:。|$)', f'答案为\s?(\S+)(?:。|$)',
f'(\s|^)[{options}][\s。,,::\.$]',
f'[Tt]he answer is ([{options}])', f'[Tt]he answer is ([{options}])',
f'[Tt]he answer is option ([{options}])', f'[Tt]he answer is option ([{options}])',
f'[Tt]he correct answer is ([{options}])', f'[Tt]he correct answer is ([{options}])',
f'[Tt]he correct answer is option ([{options}])', f'[Tt]he correct answer is option ([{options}])',
f'[Tt]he answer to the question is ([{options}])', f'[Tt]he answer to the question is ([{options}])',
f'^选项\s?([{options}])',
f'^([{options}])\s?选?项',
f'(\s|^)[{options}][\s。,,::\.$]',
f'(\s|^)[{options}](\s|$)',
f'1.\s?(.*?)$',
]
cushion_patterns = [
f'([{options}]):', f'([{options}]):',
f'(^|\s)[{options}](\s|$)',
f'[{options}]', f'[{options}]',
] ]
# flake8: noqa # flake8: noqa
# yapf: enable # yapf: enable
regexes = [re.compile(pattern) for pattern in patterns] if cushion:
for regex in regexes: patterns.extend(cushion_patterns)
match = regex.search(text) for pattern in patterns:
match = re.search(pattern, text)
if match: if match:
outputs = match.group(0) outputs = match.group(0)
for i in options: for i in options:
......
antlr4-python3-runtime==4.11
git+ssh://git@gitlab.pjlab.org.cn:1122/openmmlab/bigmodel/ilagent.git@czh/eval_gen
ipykernel
ipython
json5 json5
jupyter jupyter
jupyter_client jupyter_client
jupytext jupytext
lagent lagent
networkx
scikit-image scikit-image
sympy sympy==1.12
...@@ -7,6 +7,7 @@ datasets>=2.12.0 ...@@ -7,6 +7,7 @@ datasets>=2.12.0
einops==0.5.0 einops==0.5.0
evaluate>=0.3.0 evaluate>=0.3.0
fairscale fairscale
func_timeout
fuzzywuzzy fuzzywuzzy
jieba jieba
ltp ltp
......
...@@ -30,6 +30,14 @@ def get_prompt_hash(dataset_cfg: Union[ConfigDict, List[ConfigDict]]) -> str: ...@@ -30,6 +30,14 @@ def get_prompt_hash(dataset_cfg: Union[ConfigDict, List[ConfigDict]]) -> str:
hashes = ','.join([get_prompt_hash(cfg) for cfg in dataset_cfg]) hashes = ','.join([get_prompt_hash(cfg) for cfg in dataset_cfg])
hash_object = hashlib.sha256(hashes.encode()) hash_object = hashlib.sha256(hashes.encode())
return hash_object.hexdigest() return hash_object.hexdigest()
# for custom datasets
if 'infer_cfg' not in dataset_cfg:
dataset_cfg.pop('abbr', '')
dataset_cfg.pop('path', '')
d_json = json.dumps(dataset_cfg.to_dict(), sort_keys=True)
hash_object = hashlib.sha256(d_json.encode())
return hash_object.hexdigest()
# for regular datasets
if 'reader_cfg' in dataset_cfg.infer_cfg: if 'reader_cfg' in dataset_cfg.infer_cfg:
# new config # new config
reader_cfg = dict(type='DatasetReader', reader_cfg = dict(type='DatasetReader',
...@@ -67,7 +75,7 @@ def get_hash(path): ...@@ -67,7 +75,7 @@ def get_hash(path):
def check_and_rename(filepath): def check_and_rename(filepath):
base_name = os.path.basename(filepath) base_name = os.path.basename(filepath)
match = re.match(r'(.*)_(gen|ppl)_(.*).py', base_name) match = re.match(r'(.*)_(gen|ppl|ll)_(.*).py', base_name)
if match: if match:
dataset, mode, old_hash = match.groups() dataset, mode, old_hash = match.groups()
new_hash = get_hash(filepath) new_hash = get_hash(filepath)
...@@ -119,6 +127,7 @@ def main(): ...@@ -119,6 +127,7 @@ def main():
return return
with Pool(16) as p: with Pool(16) as p:
p.starmap(os.rename, name_pairs) p.starmap(os.rename, name_pairs)
root_folder = 'configs'
python_files = glob.glob(f'{root_folder}/**/*.py', recursive=True) python_files = glob.glob(f'{root_folder}/**/*.py', recursive=True)
update_data = [(python_file, name_pairs) for python_file in python_files] update_data = [(python_file, name_pairs) for python_file in python_files]
with Pool(16) as p: with Pool(16) as p:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment