Commit 7d346000 authored by gaotongxiao's avatar gaotongxiao
Browse files

initial commit

parents
import argparse
import getpass
import os
import os.path as osp
from datetime import datetime
from mmengine.config import Config
from opencompass.partitioners import NaivePartitioner, SizePartitioner
from opencompass.runners import DLCRunner, LocalRunner, SlurmRunner
from opencompass.utils import LarkReporter, Summarizer, get_logger
def parse_args():
parser = argparse.ArgumentParser(description='Run an evaluation task')
parser.add_argument('config', help='Train config file path')
# add mutually exclusive args `--slurm` `--dlc`, default to local runner
luach_method = parser.add_mutually_exclusive_group()
luach_method.add_argument('--slurm',
action='store_true',
default=False,
help='Whether to use srun to launch tasks, if '
'True, `--partition(-p)` must be set. Defaults'
' to False')
luach_method.add_argument('--dlc',
action='store_true',
default=False,
help='Whether to use dlc to launch tasks, if '
'True, `--aliyun-cfg` must be set. Defaults'
' to False')
# add general args
parser.add_argument('--debug',
help='Debug mode, in which scheduler will run tasks '
'in the single process, and output will not be '
'redirected to files',
action='store_true',
default=False)
parser.add_argument('-m',
'--mode',
help='Running mode. You can choose "infer" if you '
'only want the inference results, or "eval" if you '
'already have the results and want to evaluate them, '
'or "viz" if you want to visualize the results.',
choices=['all', 'infer', 'eval', 'viz'],
default='all',
type=str)
parser.add_argument('-r',
'--reuse',
nargs='?',
type=str,
const='latest',
help='Reuse previous outputs & results, and run any '
'missing jobs presented in the config. If its '
'argument is not specified, the latest results in '
'the work_dir will be reused. The argument should '
'also be a specific timestamp, e.g. 20230516_144254'),
parser.add_argument('-w',
'--work-dir',
help='Work path, all the outputs will be saved in '
'this path, including the slurm logs, the evaluation'
' results, the summary results, etc. If not specified,'
' the work_dir will be set to None',
default=None,
type=str)
parser.add_argument('-l',
'--lark',
help='Report the running status to lark bot',
action='store_true',
default=False)
parser.add_argument('--max-partition-size',
help='The maximum size of a task.',
type=int,
default=2000),
parser.add_argument(
'--gen-task-coef',
help='The dataset cost measurement coefficient for generation tasks',
type=int,
default=20)
parser.add_argument('--max-num-workers',
help='Max number of workers to run in parallel.',
type=int,
default=32)
parser.add_argument(
'--retry',
help='Number of retries if the job failed when using slurm or dlc.',
type=int,
default=2)
# set srun args
slurm_parser = parser.add_argument_group('slurm_args')
parse_slurm_args(slurm_parser)
# set dlc args
dlc_parser = parser.add_argument_group('dlc_args')
parse_dlc_args(dlc_parser)
args = parser.parse_args()
if args.slurm:
assert args.partition is not None, (
'--partition(-p) must be set if you want to use slurm')
if args.dlc:
assert os.path.exists(args.aliyun_cfg), (
'When luaching tasks using dlc, it needs to be configured'
'in "~/.aliyun.cfg", or use "--aliyun-cfg $ALiYun-CFG_Path"'
' to specify a new path.')
return args
def parse_slurm_args(slurm_parser):
"""these args are all for slurm launch."""
slurm_parser.add_argument('-p',
'--partition',
help='Slurm partition name',
default=None,
type=str)
slurm_parser.add_argument('-q',
'--quotatype',
help='Slurm quota type',
default='auto',
type=str)
def parse_dlc_args(dlc_parser):
"""these args are all for dlc launch."""
dlc_parser.add_argument('--aliyun-cfg',
help='The config path for aliyun config',
default='~/.aliyun.cfg',
type=str)
def main():
args = parse_args()
# initialize logger
logger = get_logger(log_level='DEBUG' if args.debug else 'INFO')
cfg = Config.fromfile(args.config)
if args.work_dir is not None:
cfg['work_dir'] = args.work_dir
else:
cfg.setdefault('work_dir', './outputs/default/')
# cfg_time_str defaults to the current time
cfg_time_str = dir_time_str = datetime.now().strftime('%Y%m%d_%H%M%S')
if args.reuse:
if args.reuse == 'latest':
dirs = os.listdir(cfg.work_dir)
assert len(dirs) > 0, 'No previous results to reuse!'
dir_time_str = sorted(dirs)[-1]
else:
dir_time_str = args.reuse
logger.info(f'Reusing experiements from {dir_time_str}')
elif args.mode in ['eval', 'viz']:
raise ValueError('You must specify -r or --reuse when running in eval '
'or viz mode!')
# update "actual" work_dir
cfg['work_dir'] = osp.join(cfg.work_dir, dir_time_str)
os.makedirs(osp.join(cfg.work_dir, 'configs'), exist_ok=True)
# dump config
output_config_path = osp.join(cfg.work_dir, 'configs',
f'{cfg_time_str}.py')
cfg.dump(output_config_path)
# Config is intentally reloaded here to avoid initialized
# types cannot be serialized
cfg = Config.fromfile(output_config_path)
# report to lark bot if specify --lark
if not args.lark:
cfg['lark_bot_url'] = None
elif cfg.get('lark_bot_url', None):
content = f'{getpass.getuser()}\'s task has been launched!'
LarkReporter(cfg['lark_bot_url']).post(content)
if args.mode in ['all', 'infer']:
# Use SizePartitioner to split into subtasks
partitioner = SizePartitioner(osp.join(cfg['work_dir'],
'predictions/'),
max_task_size=args.max_partition_size,
gen_task_coef=args.gen_task_coef)
tasks = partitioner(cfg)
# execute the infer subtasks
exec_infer_runner(tasks, args, cfg)
# evaluate
if args.mode in ['all', 'eval']:
# Use NaivePartitioner,not split
partitioner = NaivePartitioner(osp.join(cfg['work_dir'], 'results/'))
tasks = partitioner(cfg)
# execute the eval tasks
exec_eval_runner(tasks, args, cfg)
# visualize
if args.mode in ['all', 'eval', 'viz']:
summarizer = Summarizer(cfg)
summarizer.summarize(time_str=cfg_time_str)
def exec_infer_runner(tasks, args, cfg):
"""execute infer runner according to args."""
if args.slurm:
runner = SlurmRunner(dict(type='OpenICLInferTask'),
max_num_workers=args.max_num_workers,
partition=args.partition,
quotatype=args.quotatype,
retry=args.retry,
debug=args.debug,
lark_bot_url=cfg['lark_bot_url'])
elif args.dlc:
runner = DLCRunner(dict(type='OpenICLInferTask'),
max_num_workers=args.max_num_workers,
aliyun_cfg=Config.fromfile(args.aliyun_cfg),
retry=args.retry,
debug=args.debug,
lark_bot_url=cfg['lark_bot_url'])
else:
runner = LocalRunner(
task=dict(type='OpenICLInferTask'),
# max_num_workers = args.max_num_workers,
debug=args.debug,
lark_bot_url=cfg['lark_bot_url'])
runner(tasks)
def exec_eval_runner(tasks, args, cfg):
"""execute infer runner according to args."""
if args.slurm:
runner = SlurmRunner(dict(type='OpenICLEvalTask'),
max_num_workers=args.max_num_workers,
partition=args.partition,
quotatype=args.quotatype,
retry=args.retry,
debug=args.debug,
lark_bot_url=cfg['lark_bot_url'])
elif args.dlc:
runner = DLCRunner(dict(type='OpenICLEvalTask'),
max_num_workers=args.max_num_workers,
aliyun_cfg=Config.fromfile(args.aliyun_cfg),
retry=args.retry,
debug=args.debug,
lark_bot_url=cfg['lark_bot_url'])
else:
runner = LocalRunner(
task=dict(type='OpenICLEvalTask'),
# max_num_workers = args.max_num_workers,
debug=args.debug,
lark_bot_url=cfg['lark_bot_url'])
runner(tasks)
if __name__ == '__main__':
main()
import unittest
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.utils.prompt import PromptList
class TestPromptTemplate(unittest.TestCase):
def setUp(self) -> None:
self.qa_template = dict(begin=[
dict(role='SYSTEM', fallback_role='HUMAN', prompt='instruct'),
'</E>',
],
round=[
dict(role='HUMAN', prompt='</input>'),
dict(role='BOT',
prompt='Answer: </answer>')
])
self.multiround_qa_template = dict(round=[
dict(role='HUMAN', prompt='</input>'),
dict(role='BOT', prompt='A1', end='\n'),
dict(role='HUMAN', prompt='Q1'),
dict(role='BOT', prompt='A2', end='\n\n'),
dict(role='HUMAN', prompt='Q2', begin='HUMAN:'),
dict(role='BOT', prompt='Answer: </answer>')
])
self.column_token_map = {
'input': '</input>',
'answer': '</answer>',
}
self.entry = {'input': 'Hello, how are you?', 'answer': 'Good.'}
def test_init(self):
template = 'Translate the following English text to French: {t}.'
column_token_map = {'input': '{t}'}
pt = PromptTemplate(template, column_token_map)
self.assertEqual(pt.template, template)
self.assertEqual(pt.column_token_map, column_token_map)
def test_generate_ice_item(self):
# Test simple prompt
template = 'Translate the following English text to French: {t}.'
column_token_map = {'input': '{t}'}
pt = PromptTemplate(template, column_token_map)
label = None
ice = pt.generate_ice_item(self.entry, label)
self.assertEqual(ice,
('Translate the following English text to French: '
'Hello, how are you?.'))
# test meta prompt style
pt = PromptTemplate(self.qa_template,
self.column_token_map,
ice_token='</E>')
label = None
ice = pt.generate_ice_item(self.entry, label)
ice_target = PromptList([
{
'section': 'ice',
'pos': 'begin'
},
dict(role='HUMAN', prompt='Hello, how are you?'),
dict(role='BOT', prompt='Answer: Good.'),
{
'section': 'ice',
'pos': 'end'
},
])
self.assertEqual(ice, ice_target)
# test_multiround
pt = PromptTemplate(self.multiround_qa_template,
self.column_token_map,
ice_token='</E>')
label = None
ice = pt.generate_ice_item(self.entry, label)
ice_target = PromptList([
{
'section': 'ice',
'pos': 'begin'
},
dict(role='HUMAN', prompt='Hello, how are you?'),
dict(role='BOT', prompt='A1', end='\n'),
dict(role='HUMAN', prompt='Q1'),
dict(role='BOT', prompt='A2', end='\n\n'),
dict(role='HUMAN', prompt='Q2', begin='HUMAN:'),
dict(role='BOT', prompt='Answer: Good.'),
{
'section': 'ice',
'pos': 'end'
},
])
self.assertEqual(ice, ice_target)
def test_generate_label_prompt_item(self):
# Test simple prompt
template = '</E> Translate the following English text to French: {t}.'
column_token_map = {'input': '{t}'}
pt = PromptTemplate(template, column_token_map, ice_token='</E>')
ice = 'ICE'
label = None
prompt = pt.generate_label_prompt_item(self.entry, ice, label)
self.assertEqual(
prompt, ('ICE Translate the following English text to French: '
'Hello, how are you?.'))
ice = PromptList([
{
'section': 'ice',
'pos': 'begin'
},
dict(role='HUMAN', prompt='h1'),
dict(role='BOT', prompt='b1'),
{
'section': 'ice',
'pos': 'end'
},
])
# test meta prompt style
pt = PromptTemplate(self.qa_template,
self.column_token_map,
ice_token='</E>')
label = None
prompt = pt.generate_label_prompt_item(self.entry, ice, label)
target = PromptList([
{
'section': 'begin',
'pos': 'begin'
},
dict(role='SYSTEM', fallback_role='HUMAN', prompt='instruct'),
{
'section': 'ice',
'pos': 'begin'
},
dict(role='HUMAN', prompt='h1'),
dict(role='BOT', prompt='b1'),
{
'section': 'ice',
'pos': 'end'
},
{
'section': 'begin',
'pos': 'end'
},
{
'section': 'round',
'pos': 'begin'
},
dict(role='HUMAN', prompt='Hello, how are you?'),
dict(role='BOT', prompt='Answer: Good.'),
{
'section': 'round',
'pos': 'end'
},
])
self.assertEqual(prompt, target)
# test_multiround
pt = PromptTemplate(self.multiround_qa_template,
self.column_token_map,
ice_token='</E>')
label = None
prompt = pt.generate_label_prompt_item(self.entry, ice, label)
target = PromptList([
{
'section': 'round',
'pos': 'begin'
},
dict(role='HUMAN', prompt='Hello, how are you?'),
dict(role='BOT', prompt='A1', end='\n'),
dict(role='HUMAN', prompt='Q1'),
dict(role='BOT', prompt='A2', end='\n\n'),
dict(role='HUMAN', prompt='Q2', begin='HUMAN:'),
dict(role='BOT', prompt='Answer: Good.'),
{
'section': 'round',
'pos': 'end'
},
])
self.assertEqual(prompt, target)
def test_generate_item(self):
# Test simple prompt
template = 'Translate the following English text to French: {t}.'
column_token_map = {'input': '{t}'}
pt = PromptTemplate(template, column_token_map)
item = pt.generate_item(self.entry)
self.assertEqual(item,
('Translate the following English text to French: '
'Hello, how are you?.'))
ice = PromptList([
{
'section': 'ice',
'pos': 'begin'
},
dict(role='HUMAN', prompt='h1'),
dict(role='BOT', prompt='b1'),
{
'section': 'ice',
'pos': 'end'
},
])
# test meta prompt (without system role)
pt = PromptTemplate(self.qa_template,
self.column_token_map,
ice_token='</E>')
prompt = pt.generate_item(self.entry, ice_field_replace_token=ice)
target = PromptList([
{
'section': 'begin',
'pos': 'begin'
},
dict(role='SYSTEM', fallback_role='HUMAN', prompt='instruct'),
{
'section': 'ice',
'pos': 'begin'
},
dict(role='HUMAN', prompt='h1'),
dict(role='BOT', prompt='b1'),
{
'section': 'ice',
'pos': 'end'
},
{
'section': 'begin',
'pos': 'end'
},
{
'section': 'round',
'pos': 'begin'
},
dict(role='HUMAN', prompt='Hello, how are you?'),
dict(role='BOT', prompt='Answer: Good.'),
{
'section': 'round',
'pos': 'end'
},
])
self.assertEqual(prompt, target)
pt = PromptTemplate(self.multiround_qa_template,
self.column_token_map,
ice_token='</E>')
prompt = pt.generate_item(self.entry, ice_field_replace_token=ice)
target = PromptList([
{
'section': 'round',
'pos': 'begin'
},
dict(role='HUMAN', prompt='Hello, how are you?'),
dict(role='BOT', prompt='A1', end='\n'),
dict(role='HUMAN', prompt='Q1'),
dict(role='BOT', prompt='A2', end='\n\n'),
dict(role='HUMAN', prompt='Q2', begin='HUMAN:'),
dict(role='BOT', prompt='Answer: Good.'),
{
'section': 'round',
'pos': 'end'
},
])
self.assertEqual(prompt, target)
import unittest
from opencompass.models.base_api import APITemplateParser
from opencompass.utils.prompt import PromptList
class TestAPITemplateParser(unittest.TestCase):
def setUp(self):
self.parser = APITemplateParser()
self.prompt = PromptList([
{
'section': 'begin',
'pos': 'begin'
},
'begin',
{
'role': 'SYSTEM',
'fallback_role': 'HUMAN',
'prompt': 'system msg'
},
{
'section': 'ice',
'pos': 'begin'
},
{
'role': 'HUMAN',
'prompt': 'U0'
},
{
'role': 'BOT',
'prompt': 'B0'
},
{
'section': 'ice',
'pos': 'end'
},
{
'section': 'begin',
'pos': 'end'
},
{
'section': 'round',
'pos': 'begin'
},
{
'role': 'HUMAN',
'prompt': 'U1'
},
{
'role': 'BOT',
'prompt': 'B1'
},
{
'role': 'HUMAN',
'prompt': 'U2'
},
{
'role': 'BOT',
'prompt': 'B2'
},
{
'section': 'round',
'pos': 'end'
},
{
'section': 'end',
'pos': 'begin'
},
'end',
{
'section': 'end',
'pos': 'end'
},
])
def test_parse_template_str_input(self):
prompt = self.parser.parse_template('Hello, world!', mode='gen')
self.assertEqual(prompt, 'Hello, world!')
prompt = self.parser.parse_template('Hello, world!', mode='ppl')
self.assertEqual(prompt, 'Hello, world!')
def test_parse_template_list_input(self):
prompt = self.parser.parse_template(['Hello', 'world'], mode='gen')
self.assertEqual(prompt, ['Hello', 'world'])
prompt = self.parser.parse_template(['Hello', 'world'], mode='ppl')
self.assertEqual(prompt, ['Hello', 'world'])
def test_parse_template_PromptList_input_no_meta_template(self):
prompt = self.parser.parse_template(self.prompt, mode='gen')
self.assertEqual(prompt,
'begin\nsystem msg\nU0\nB0\nU1\nB1\nU2\nB2\nend')
prompt = self.parser.parse_template(self.prompt, mode='ppl')
self.assertEqual(prompt,
'begin\nsystem msg\nU0\nB0\nU1\nB1\nU2\nB2\nend')
def test_parse_template_PromptList_input_with_meta_template(self):
parser = APITemplateParser(meta_template=dict(round=[
dict(role='HUMAN', api_role='HUMAN'),
dict(role='BOT', api_role='BOT', generate=True)
], ))
with self.assertWarns(Warning):
prompt = parser.parse_template(self.prompt, mode='gen')
self.assertEqual(
prompt,
PromptList([
{
'role': 'HUMAN',
'prompt': 'system msg\nU0'
},
{
'role': 'BOT',
'prompt': 'B0'
},
{
'role': 'HUMAN',
'prompt': 'U1'
},
{
'role': 'BOT',
'prompt': 'B1'
},
{
'role': 'HUMAN',
'prompt': 'U2'
},
]))
with self.assertWarns(Warning):
prompt = parser.parse_template(self.prompt, mode='ppl')
self.assertEqual(
prompt,
PromptList([
{
'role': 'HUMAN',
'prompt': 'system msg\nU0'
},
{
'role': 'BOT',
'prompt': 'B0'
},
{
'role': 'HUMAN',
'prompt': 'U1'
},
{
'role': 'BOT',
'prompt': 'B1'
},
{
'role': 'HUMAN',
'prompt': 'U2'
},
{
'role': 'BOT',
'prompt': 'B2'
},
]))
parser = APITemplateParser(meta_template=dict(
round=[
dict(role='HUMAN', api_role='HUMAN'),
dict(role='BOT', api_role='BOT', generate=True)
],
reserved_roles=[
dict(role='SYSTEM', api_role='SYSTEM'),
],
))
with self.assertWarns(Warning):
prompt = parser.parse_template(self.prompt, mode='gen')
self.assertEqual(
prompt,
PromptList([
{
'role': 'SYSTEM',
'prompt': 'system msg'
},
{
'role': 'HUMAN',
'prompt': 'U0'
},
{
'role': 'BOT',
'prompt': 'B0'
},
{
'role': 'HUMAN',
'prompt': 'U1'
},
{
'role': 'BOT',
'prompt': 'B1'
},
{
'role': 'HUMAN',
'prompt': 'U2'
},
]))
with self.assertWarns(Warning):
prompt = parser.parse_template(self.prompt, mode='ppl')
self.assertEqual(
prompt,
PromptList([
{
'role': 'SYSTEM',
'prompt': 'system msg'
},
{
'role': 'HUMAN',
'prompt': 'U0'
},
{
'role': 'BOT',
'prompt': 'B0'
},
{
'role': 'HUMAN',
'prompt': 'U1'
},
{
'role': 'BOT',
'prompt': 'B1'
},
{
'role': 'HUMAN',
'prompt': 'U2'
},
{
'role': 'BOT',
'prompt': 'B2'
},
]))
import unittest
from opencompass.models.base import LMTemplateParser
from opencompass.utils.prompt import PromptList
class TestLMTemplateParser(unittest.TestCase):
def setUp(self):
self.parser = LMTemplateParser()
self.prompt = PromptList([
{
'section': 'begin',
'pos': 'begin'
},
'begin',
{
'role': 'SYSTEM',
'fallback_role': 'HUMAN',
'prompt': 'system msg'
},
{
'section': 'ice',
'pos': 'begin'
},
{
'role': 'HUMAN',
'prompt': 'U0'
},
{
'role': 'BOT',
'prompt': 'B0'
},
{
'section': 'ice',
'pos': 'end'
},
{
'section': 'begin',
'pos': 'end'
},
{
'section': 'round',
'pos': 'begin'
},
{
'role': 'HUMAN',
'prompt': 'U1',
'end': '\n'
},
{
'role': 'BOT',
'prompt': 'B1'
},
{
'role': 'HUMAN',
'prompt': 'U2'
},
{
'role': 'BOT',
'prompt': 'B2'
},
{
'section': 'round',
'pos': 'end'
},
{
'section': 'end',
'pos': 'begin'
},
'end',
{
'section': 'end',
'pos': 'end'
},
])
def test_parse_template_str_input(self):
prompt = self.parser.parse_template('Hello, world!', mode='gen')
self.assertEqual(prompt, 'Hello, world!')
prompt = self.parser.parse_template('Hello, world!', mode='ppl')
self.assertEqual(prompt, 'Hello, world!')
def test_parse_template_list_input(self):
prompt = self.parser.parse_template(['Hello', 'world'], mode='gen')
self.assertEqual(prompt, ['Hello', 'world'])
prompt = self.parser.parse_template(['Hello', 'world'], mode='ppl')
self.assertEqual(prompt, ['Hello', 'world'])
def test_parse_template_PromptList_input_no_meta_template(self):
prompt = self.parser.parse_template(self.prompt, mode='gen')
self.assertEqual(prompt,
'begin\nsystem msg\nU0\nB0\nU1\nB1\nU2\nB2\nend')
prompt = self.parser.parse_template(self.prompt, mode='ppl')
self.assertEqual(prompt,
'begin\nsystem msg\nU0\nB0\nU1\nB1\nU2\nB2\nend')
def test_parse_template_PromptList_input_with_meta_template(self):
# no SYSTEM role, early generation in THOUGHTS
parser = LMTemplateParser(meta_template=dict(
begin='meta instruction\n',
round=[
dict(role='HUMAN', begin='<|HUMAN|>:', end='<eoh>\n'),
dict(role='THOUGHTS',
begin='<|Inner Thoughts|>:',
generate=True,
end='<eot>\n',
prompt='None'),
dict(role='BOT', begin='<|BOT|>:', end='<eob>\n'),
],
end='meta end',
))
prompt = parser.parse_template(self.prompt, mode='gen')
target = ('meta instruction\n'
'begin'
'<|HUMAN|>:system msg<eoh>\n'
'<|HUMAN|>:U0<eoh>\n'
'<|Inner Thoughts|>:None<eot>\n'
'<|BOT|>:B0<eob>\n'
'<|HUMAN|>:U1\n'
'<|Inner Thoughts|>:None<eot>\n'
'<|BOT|>:B1<eob>\n'
'<|HUMAN|>:U2<eoh>\n'
'<|Inner Thoughts|>:')
self.assertEqual(prompt, target)
prompt = parser.parse_template(self.prompt, mode='ppl')
target = ('meta instruction\n'
'begin'
'<|HUMAN|>:system msg<eoh>\n'
'<|HUMAN|>:U0<eoh>\n'
'<|Inner Thoughts|>:None<eot>\n'
'<|BOT|>:B0<eob>\n'
'<|HUMAN|>:U1\n'
'<|Inner Thoughts|>:None<eot>\n'
'<|BOT|>:B1<eob>\n'
'<|HUMAN|>:U2<eoh>\n'
'<|Inner Thoughts|>:None<eot>\n'
'<|BOT|>:B2<eob>\n'
'end'
'meta end')
self.assertEqual(prompt, target)
# no SYSTEM role, generation in BOT
parser = LMTemplateParser(meta_template=dict(
begin='meta instruction\n',
round=[
dict(role='HUMAN', begin='<|HUMAN|>:', end='<eoh>\n'),
dict(role='THOUGHTS',
begin='<|Inner Thoughts|>:',
end='<eot>\n',
prompt='None'),
dict(
role='BOT', begin='<|BOT|>:', end='<eob>\n',
generate=True),
],
end='meta end',
))
prompt = parser.parse_template(self.prompt, mode='gen')
target = ('meta instruction\n'
'begin'
'<|HUMAN|>:system msg<eoh>\n'
'<|HUMAN|>:U0<eoh>\n'
'<|Inner Thoughts|>:None<eot>\n'
'<|BOT|>:B0<eob>\n'
'<|HUMAN|>:U1\n'
'<|Inner Thoughts|>:None<eot>\n'
'<|BOT|>:B1<eob>\n'
'<|HUMAN|>:U2<eoh>\n'
'<|Inner Thoughts|>:None<eot>\n'
'<|BOT|>:')
self.assertEqual(prompt, target)
prompt = parser.parse_template(self.prompt, mode='ppl')
target = ('meta instruction\n'
'begin'
'<|HUMAN|>:system msg<eoh>\n'
'<|HUMAN|>:U0<eoh>\n'
'<|Inner Thoughts|>:None<eot>\n'
'<|BOT|>:B0<eob>\n'
'<|HUMAN|>:U1\n'
'<|Inner Thoughts|>:None<eot>\n'
'<|BOT|>:B1<eob>\n'
'<|HUMAN|>:U2<eoh>\n'
'<|Inner Thoughts|>:None<eot>\n'
'<|BOT|>:B2<eob>\n'
'end'
'meta end')
self.assertEqual(prompt, target)
# with SYSTEM role, generation in BOT
parser = LMTemplateParser(meta_template=dict(
begin='meta instruction\n',
round=[
dict(role='HUMAN', begin='<|HUMAN|>:', end='<eoh>\n'),
dict(role='THOUGHTS',
begin='<|Inner Thoughts|>:',
end='<eot>\n',
prompt='None'),
dict(
role='BOT', begin='<|BOT|>:', end='<eob>\n',
generate=True),
],
end='meta end',
reserved_roles=[
dict(role='SYSTEM', begin='<|SYSTEM|>:', end='<eos>\n')
]))
prompt = parser.parse_template(self.prompt, mode='gen')
target = ('meta instruction\n'
'begin'
'<|SYSTEM|>:system msg<eos>\n'
'<|HUMAN|>:U0<eoh>\n'
'<|Inner Thoughts|>:None<eot>\n'
'<|BOT|>:B0<eob>\n'
'<|HUMAN|>:U1\n'
'<|Inner Thoughts|>:None<eot>\n'
'<|BOT|>:B1<eob>\n'
'<|HUMAN|>:U2<eoh>\n'
'<|Inner Thoughts|>:None<eot>\n'
'<|BOT|>:')
self.assertEqual(prompt, target)
prompt = parser.parse_template(self.prompt, mode='ppl')
target = ('meta instruction\n'
'begin'
'<|SYSTEM|>:system msg<eos>\n'
'<|HUMAN|>:U0<eoh>\n'
'<|Inner Thoughts|>:None<eot>\n'
'<|BOT|>:B0<eob>\n'
'<|HUMAN|>:U1\n'
'<|Inner Thoughts|>:None<eot>\n'
'<|BOT|>:B1<eob>\n'
'<|HUMAN|>:U2<eoh>\n'
'<|Inner Thoughts|>:None<eot>\n'
'<|BOT|>:B2<eob>\n'
'end'
'meta end')
self.assertEqual(prompt, target)
import argparse
import copy
import json
import os.path as osp
import mmengine
from mmengine.config import Config, ConfigDict
from mmengine.utils import mkdir_or_exist
from tqdm import tqdm
from opencompass.registry import TEXT_POSTPROCESSORS
from opencompass.utils import build_dataset_from_cfg, get_infer_output_path
def parse_args():
parser = argparse.ArgumentParser(description='Run an evaluation task')
parser.add_argument('config', help='Train config file path')
parser.add_argument(
'-f',
'--force',
help='Force to run the task even if the results already exist',
action='store_true',
default=False)
parser.add_argument('-w',
'--work-dir',
help='Work path, all the outputs will be '
'saved in this path, including the slurm logs, '
'the evaluation results, the summary results, etc.'
'If not specified, the work_dir will be set to '
'./outputs/default.',
default=None,
type=str)
args = parser.parse_args()
return args
class BadcaseShower:
""""""
def __init__(self, cfg: ConfigDict) -> None:
self.cfg = cfg
self.model_cfg = copy.deepcopy(self.cfg['model'])
self.dataset_cfg = copy.deepcopy(self.cfg['dataset'])
self.work_dir = self.cfg.get('work_dir')
# Load Dataset
self.eval_cfg = self.dataset_cfg.get('eval_cfg')
self.ds_split = self.eval_cfg.get('ds_split', None)
self.ds_column = self.eval_cfg.get('ds_column')
def run(self):
filename = get_infer_output_path(
self.model_cfg, self.dataset_cfg,
osp.join(self.work_dir, 'predictions'))
root, ext = osp.splitext(filename)
partial_filename = root + '_0' + ext
if not osp.exists(osp.realpath(filename)) and not osp.exists(
osp.realpath(partial_filename)):
print(f'{filename} not found')
return
dataset = build_dataset_from_cfg(self.dataset_cfg)
# Postprocess dataset if necessary
if 'dataset_postprocessor' in self.eval_cfg:
def postprocess(sample):
s = sample[self.ds_column]
proc = TEXT_POSTPROCESSORS.get(
self.eval_cfg['dataset_postprocessor']['type'])
sample[self.ds_column] = proc(s)
return sample
dataset = dataset.map(postprocess)
# Load predictions
if osp.exists(osp.realpath(filename)):
preds = mmengine.load(filename)
else:
filename = partial_filename
preds, offset = {}, 0
i = 1
while osp.exists(osp.realpath(filename)):
_preds = mmengine.load(filename)
filename = root + f'_{i}' + ext
i += 1
for _o in range(len(_preds)):
preds[str(offset)] = _preds[str(_o)]
offset += 1
pred_strs = [preds[str(i)]['prediction'] for i in range(len(preds))]
# Postprocess predictions if necessary
if 'pred_postprocessor' in self.eval_cfg:
proc = TEXT_POSTPROCESSORS.get(
self.eval_cfg['pred_postprocessor']['type'])
pred_strs = [proc(s) for s in pred_strs]
if self.ds_split:
references = dataset[self.ds_split][self.ds_column]
else:
references = dataset[self.ds_column]
if len(pred_strs) != len(references):
print('length mismatch')
return
# combine cases
allcase, badcase = [], []
if 'in-context examples' in preds['0']:
# ppl eval
for i, (pred_str,
reference) in enumerate(zip(tqdm(pred_strs), references)):
ref_str = str(reference)
try:
pred_prompt = preds[str(i)]['label: ' +
pred_str]['testing input']
pred_PPL = preds[str(i)]['label: ' + pred_str]['PPL']
ref_prompt = preds[str(i)]['label: ' +
ref_str]['testing input']
ref_PPL = preds[str(i)]['label: ' + ref_str]['PPL']
except KeyError:
continue
item = {
'prediction_prompt': pred_prompt,
'prediction': pred_str,
'prediction_PPL': pred_PPL,
'reference_prompt': ref_prompt,
'reference': ref_str,
'reference_PPL': ref_PPL
}
if pred_str != ref_str:
badcase.append(item)
allcase.append(item)
else:
allcase.append(item)
else:
# gen eval
for i, (pred_str,
reference) in enumerate(zip(tqdm(pred_strs), references)):
ref_str = str(reference)
origin_prompt = preds[str(i)]['origin_prompt']
item = {
'origin_prompt': origin_prompt,
'prediction': pred_str,
'reference': ref_str
}
# FIXME: we now consider all cases as bad cases
badcase.append(item)
allcase.append(item)
# Save result
out_path = get_infer_output_path(
self.cfg['model'], self.cfg['dataset'],
osp.join(self.work_dir, 'case_analysis/bad'))
mkdir_or_exist(osp.split(out_path)[0])
with open(out_path, 'w', encoding='utf-8') as f:
json.dump(badcase, f, indent=4, ensure_ascii=False)
out_path = get_infer_output_path(
self.cfg['model'], self.cfg['dataset'],
osp.join(self.work_dir, 'case_analysis/all'))
mkdir_or_exist(osp.split(out_path)[0])
with open(out_path, 'w', encoding='utf-8') as f:
json.dump(allcase, f, indent=4, ensure_ascii=False)
def dispatch_tasks(cfg, force=False):
for model in cfg['models']:
for dataset in cfg['datasets']:
if force or not osp.exists(
get_infer_output_path(
model, dataset,
osp.join(cfg['work_dir'], 'case_analysis/all'))):
BadcaseShower({
'model': model,
'dataset': dataset,
'work_dir': cfg['work_dir']
}).run()
def main():
args = parse_args()
cfg = Config.fromfile(args.config)
# set work_dir
if args.work_dir is not None:
cfg['work_dir'] = args.work_dir
else:
cfg.setdefault('work_dir', './outputs/default')
dispatch_tasks(cfg, force=args.force)
if __name__ == '__main__':
main()
import argparse
import getpass
import os
import os.path as osp
from datetime import datetime
from mmengine.config import Config
from opencompass.registry import PARTITIONERS, RUNNERS
from opencompass.runners import SlurmRunner
from opencompass.utils import LarkReporter, Summarizer, get_logger
def parse_args():
parser = argparse.ArgumentParser(description='Run an evaluation task')
parser.add_argument('config', help='Train config file path')
parser.add_argument('-p',
'--partition',
help='Slurm partition name',
default=None,
type=str)
parser.add_argument('-q',
'--quotatype',
help='Slurm quota type',
default='auto',
type=str)
parser.add_argument('--debug',
help='Debug mode, in which scheduler will run tasks '
'in the single process, and output will not be '
'redirected to files',
action='store_true',
default=False)
parser.add_argument('-m',
'--mode',
help='Running mode. You can choose "infer" if you '
'only want the inference results, or "eval" if you '
'already have the results and want to evaluate them, '
'or "viz" if you want to visualize the results.',
choices=['all', 'infer', 'eval', 'viz'],
default='all',
type=str)
parser.add_argument('-r',
'--reuse',
nargs='?',
type=str,
const='latest',
help='Reuse previous outputs & results, and run any '
'missing jobs presented in the config. If its '
'argument is not specified, the latest results in '
'the work_dir will be reused. The argument should '
'also be a specific timestamp, e.g. 20230516_144254'),
parser.add_argument('-w',
'--work-dir',
help='Work path, all the outputs will be '
'saved in this path, including the slurm logs, '
'the evaluation results, the summary results, etc.'
'If not specified, the work_dir will be set to '
'./outputs/default.',
default=None,
type=str)
parser.add_argument('-l',
'--lark',
help='Report the running status to lark bot',
action='store_true',
default=False)
args = parser.parse_args()
return args
def main():
args = parse_args()
# initialize logger
logger = get_logger(log_level='DEBUG' if args.debug else 'INFO')
cfg = Config.fromfile(args.config)
if args.work_dir is not None:
cfg['work_dir'] = args.work_dir
else:
cfg.setdefault('work_dir', './outputs/default/')
# cfg_time_str defaults to the current time
cfg_time_str = dir_time_str = datetime.now().strftime('%Y%m%d_%H%M%S')
if args.reuse:
if args.reuse == 'latest':
dirs = os.listdir(cfg.work_dir)
assert len(dirs) > 0, 'No previous results to reuse!'
dir_time_str = sorted(dirs)[-1]
else:
dir_time_str = args.reuse
logger.info(f'Reusing experiements from {dir_time_str}')
elif args.mode in ['eval', 'viz']:
raise ValueError('You must specify -r or --reuse when running in eval '
'or viz mode!')
# update "actual" work_dir
cfg['work_dir'] = osp.join(cfg.work_dir, dir_time_str)
os.makedirs(osp.join(cfg.work_dir, 'configs'), exist_ok=True)
# dump config
output_config_path = osp.join(cfg.work_dir, 'configs',
f'{cfg_time_str}.py')
cfg.dump(output_config_path)
# Config is intentally reloaded here to avoid initialized
# types cannot be serialized
cfg = Config.fromfile(output_config_path)
# infer
if not args.lark:
cfg['lark_bot_url'] = None
elif cfg.get('lark_bot_url', None):
content = f'{getpass.getuser()} 的新任务已启动!'
LarkReporter(cfg['lark_bot_url']).post(content)
if cfg.get('infer', None) is not None and args.mode in ['all', 'infer']:
if args.partition is not None:
if RUNNERS.get(cfg.infer.runner.type) == SlurmRunner:
cfg.infer.runner.partition = args.partition
cfg.infer.runner.quotatype = args.quotatype
else:
logger.warning('SlurmRunner is not used, so the partition '
'argument is ignored.')
if args.debug:
cfg.infer.runner.debug = True
if args.lark:
cfg.infer.runner.lark_bot_url = cfg['lark_bot_url']
cfg.infer.partitioner['out_dir'] = osp.join(cfg['work_dir'],
'predictions/')
partitioner = PARTITIONERS.build(cfg.infer.partitioner)
tasks = partitioner(cfg)
runner = RUNNERS.build(cfg.infer.runner)
runner(tasks)
# evaluate
if cfg.get('eval', None) is not None and args.mode in ['all', 'eval']:
if args.partition is not None:
if RUNNERS.get(cfg.infer.runner.type) == SlurmRunner:
cfg.eval.runner.partition = args.partition
cfg.eval.runner.quotatype = args.quotatype
else:
logger.warning('SlurmRunner is not used, so the partition '
'argument is ignored.')
if args.debug:
cfg.eval.runner.debug = True
if args.lark:
cfg.eval.runner.lark_bot_url = cfg['lark_bot_url']
cfg.eval.partitioner['out_dir'] = osp.join(cfg['work_dir'], 'results/')
partitioner = PARTITIONERS.build(cfg.eval.partitioner)
tasks = partitioner(cfg)
runner = RUNNERS.build(cfg.eval.runner)
runner(tasks)
# visualize
if args.mode in ['all', 'eval', 'viz']:
summarizer = Summarizer(cfg)
summarizer.summarize(time_str=cfg_time_str)
if __name__ == '__main__':
main()
import argparse
import fnmatch
from typing import Dict
from mmengine.config import Config, ConfigDict
from opencompass.openicl.icl_inferencer import GenInferencer, PPLInferencer
from opencompass.registry import ICL_PROMPT_TEMPLATES, ICL_RETRIEVERS
from opencompass.utils import (Menu, build_dataset_from_cfg,
build_model_from_cfg, dataset_abbr_from_cfg,
model_abbr_from_cfg)
def parse_args():
parser = argparse.ArgumentParser(description='Run an evaluation task')
parser.add_argument('config', help='Train config file path')
parser.add_argument('-n', '--non-interactive', action='store_true')
parser.add_argument('-a', '--all', action='store_true')
parser.add_argument('-p',
'--pattern',
type=str,
help='To match the dataset abbr.')
args = parser.parse_args()
return args
def parse_model_cfg(model_cfg: ConfigDict) -> Dict[str, ConfigDict]:
model2cfg = {}
for model in model_cfg:
model2cfg[model_abbr_from_cfg(model)] = model
return model2cfg
def parse_dataset_cfg(dataset_cfg: ConfigDict) -> Dict[str, ConfigDict]:
dataset2cfg = {}
for dataset in dataset_cfg:
dataset2cfg[dataset_abbr_from_cfg(dataset)] = dataset
return dataset2cfg
def print_prompts(model_cfg, dataset_cfg):
# TODO: A really dirty method that copies code from PPLInferencer and
# GenInferencer. In the future, the prompt extraction code should be
# extracted and generalized as a static method in these Inferencers
# and reused here.
if model_cfg:
max_seq_len = model_cfg.max_seq_len
if not model_cfg['type'].is_api:
model_cfg['tokenizer_only'] = True
model = build_model_from_cfg(model_cfg)
else:
max_seq_len = None
model = None
infer_cfg = dataset_cfg.get('infer_cfg')
fix_id_list = infer_cfg.inferencer.get('fix_id_list', [])
dataset = build_dataset_from_cfg(dataset_cfg)
ice_template = None
if hasattr(infer_cfg, 'ice_template'):
ice_template = ICL_PROMPT_TEMPLATES.build(infer_cfg['ice_template'])
prompt_template = None
if hasattr(infer_cfg, 'prompt_template'):
prompt_template = ICL_PROMPT_TEMPLATES.build(
infer_cfg['prompt_template'])
infer_cfg['retriever']['dataset'] = dataset
retriever = ICL_RETRIEVERS.build(infer_cfg['retriever'])
if fix_id_list:
ice_idx_list = retriever.retrieve(fix_id_list)
else:
ice_idx_list = retriever.retrieve()
assert infer_cfg.inferencer.type in [PPLInferencer, GenInferencer], \
'Only PPLInferencer and GenInferencer are supported'
if infer_cfg.inferencer.type == PPLInferencer:
labels = retriever.get_labels(ice_template=ice_template,
prompt_template=prompt_template)
ice = [
retriever.generate_ice(ice_idx_list[idx],
ice_template=ice_template)
for idx in range(len(ice_idx_list))
]
print('-' * 100)
print('ICE Template:')
print('-' * 100)
print(ice[0])
print('-' * 100)
for label in labels:
idx = 0
prompt = retriever.generate_label_prompt(
idx,
ice[idx],
label,
ice_template=ice_template,
prompt_template=prompt_template,
remain_sep=None)
if max_seq_len is not None:
prompt_token_num = model.get_token_len_from_template(prompt)
while len(ice_idx_list[idx]
) > 0 and prompt_token_num > max_seq_len:
num_ice = len(ice_idx_list[idx])
print(f'Truncating ice {num_ice} -> {num_ice - 1}',
f'Number of tokens: {prompt_token_num} -> ...')
ice_idx_list[idx] = ice_idx_list[idx][:-1]
ice[idx] = retriever.generate_ice(
ice_idx_list[idx], ice_template=ice_template)
prompt = retriever.generate_label_prompt(
idx,
ice[idx],
label,
ice_template=ice_template,
prompt_template=prompt_template)
prompt_token_num = model.get_token_len_from_template(
prompt)
print(f'Number of tokens: {prompt_token_num}')
if model is not None:
prompt = model.parse_template(prompt, mode='ppl')
print('-' * 100)
print(f'Label: {label}')
print('Sample prompt:')
print('-' * 100)
print(prompt)
print('-' * 100)
elif infer_cfg.inferencer.type == GenInferencer:
idx, ice_idx = 0, ice_idx_list[0]
ice = retriever.generate_ice(ice_idx, ice_template=ice_template)
prompt = retriever.generate_prompt_for_generate_task(
idx,
ice,
gen_field_replace_token=infer_cfg.inferencer.get(
'gen_field_replace_token', ''),
ice_template=ice_template,
prompt_template=prompt_template)
if max_seq_len is not None:
prompt_token_num = model.get_token_len_from_template(prompt)
while len(ice_idx) > 0 and prompt_token_num > max_seq_len:
num_ice = len(ice_idx)
print(f'Truncating ice {num_ice} -> {num_ice - 1}',
f'Number of tokens: {prompt_token_num} -> ...')
ice_idx = ice_idx[:-1]
ice = retriever.generate_ice(ice_idx,
ice_template=ice_template)
prompt = retriever.generate_prompt_for_generate_task(
idx,
ice,
gen_field_replace_token=infer_cfg.inferencer.get(
'gen_field_replace_token', ''),
ice_template=ice_template,
prompt_template=prompt_template)
prompt_token_num = model.get_token_len_from_template(prompt)
print(f'Number of tokens: {prompt_token_num}')
if model is not None:
prompt = model.parse_template(prompt, mode='gen')
print('-' * 100)
print('Sample prompt:')
print('-' * 100)
print(prompt)
print('-' * 100)
def main():
args = parse_args()
cfg = Config.fromfile(args.config)
# cfg.models =
model2cfg = parse_model_cfg(cfg.models) if 'models' in cfg else {
'None': None
}
if 'datasets' in cfg:
dataset2cfg = parse_dataset_cfg(cfg.datasets)
else:
dataset2cfg = {}
for key in cfg.keys():
if key.endswith('_datasets'):
dataset2cfg.update(parse_dataset_cfg(cfg[key]))
if args.pattern is not None:
matches = fnmatch.filter(dataset2cfg, args.pattern)
if len(matches) == 0:
raise ValueError(
'No dataset match the pattern. Please select from: \n' +
'\n'.join(dataset2cfg.keys()))
dataset2cfg = {k: dataset2cfg[k] for k in matches}
if not args.all:
if not args.non_interactive:
model, dataset = Menu(
[list(model2cfg.keys()),
list(dataset2cfg.keys())], [
f'Please make a selection of {s}:'
for s in ['model', 'dataset']
]).run()
else:
model = list(model2cfg.keys())[0]
dataset = list(dataset2cfg.keys())[0]
model_cfg = model2cfg[model]
dataset_cfg = dataset2cfg[dataset]
print_prompts(model_cfg, dataset_cfg)
else:
for model_abbr, model_cfg in model2cfg.items():
for dataset_abbr, dataset_cfg in dataset2cfg.items():
print('=' * 64, '[BEGIN]', '=' * 64)
print(f'[MODEL]: {model_abbr}')
print(f'[DATASET]: {dataset_abbr}')
print('---')
print_prompts(model_cfg, dataset_cfg)
print('=' * 65, '[END]', '=' * 65)
print()
if __name__ == '__main__':
main()
import argparse
from typing import Dict
from mmengine.config import Config, ConfigDict
from opencompass.utils import Menu, build_model_from_cfg, model_abbr_from_cfg
from opencompass.utils.prompt import PromptList
test_prompts = [
PromptList([
{
'section': 'begin',
'pos': 'begin'
},
{
'role':
'SYSTEM',
'fallback_role':
'HUMAN',
'prompt':
'The following are multiple choice questions (with answers) about professional law.' # noqa
},
'',
{
'section': 'ice',
'pos': 'begin'
},
{
'role':
'HUMAN',
'prompt':
"Without a warrant, police officers searched the garbage cans in the alley behind a man's house and discovered chemicals used to make methamphetamine, as well as cooking utensils and containers with the man's fingerprints on them. The alley was a public thoroughfare maintained by the city, and the garbage was picked up once a week by a private sanitation company. The items were found inside the garbage cans in plastic bags that had been tied closed and further secured with tape. The man was charged in federal court with the manufacture of methamphetamine. Did the search of the garbage cans violate the Fourth Amendment?\nA. No, because the man had no reasonable expectation of privacy in garbage left in the alley.\nB. No, because the probative value of the evidence outweighs the man's modest privacy claims in his garbage.\nC. Yes, because the alley was within the curtilage of the man's home and entry without a warrant was unconstitutional.\nD. Yes, because there is a reasonable expectation of privacy in one's secured garbage containers.\nAnswer: " # noqa
},
{
'role': 'BOT',
'prompt': 'A\n'
},
{
'section': 'ice',
'pos': 'end'
},
{
'section': 'ice',
'pos': 'begin'
},
{
'role':
'HUMAN',
'prompt':
'A man borrowed $500,000 from a bank, securing the loan with a mortgage on a commercial building he owned. The mortgage provided as follows: "No prepayment may be made on this loan during the first two years after the date of this mortgage. Thereafter, prepayment may be made in any amount at any time but only if accompanied by a prepayment fee of 5% of the amount prepaid." One year later, the man received an unexpected cash gift of $1 million and wished to pay off the $495,000 principal balance still owed on the loan. $495,000 principal balance still owed on the loan. Concerned that the bank might refuse prepayment, despite a rise in market interest rates in the year since the loan was made, or at least insist on the 5% prepayment fee, the man consulted an attorney concerning the enforceability of the above-quoted clause. There is no applicable statute. What is the attorney likely to say? \nA. The entire clause is unenforceable, because it violates a public policy favoring the prompt and early repayment of debt.\nB. The entire clause is unenforceable, because the rise in interest rates will allow the bank to reloan the funds without loss.\nC. The two-year prepayment prohibition and the prepayment fee provision are both valid and enforceable.\nD. The two-year prepayment prohibition is unenforceable, but the prepayment fee provision is enforceable.\nAnswer: ' # noqa
},
{
'role': 'BOT',
'prompt': 'D\n'
},
{
'section': 'ice',
'pos': 'end'
},
{
'section': 'ice',
'pos': 'begin'
},
{
'role':
'HUMAN',
'prompt':
"A woman and a defendant entered into an arrangement where the woman promised to pay the defendant $10,000 to act as a surrogate mother. In return, the defendant agreed to be implanted with the woman's embryo and carry the baby to term. The woman paid the defendant the $10,000 upfront. During the seventh month of the pregnancy, the defendant changed her mind and decided to keep the child herself. The defendant moved out of state and gave birth to the baby, which she refuses to turn over to the woman. The defendant is guilty of\nA. no crime.\nB. embezzlement.\nC. kidnapping.\nD. false pretenses.\nAnswer: " # noqa
},
{
'role': 'BOT',
'prompt': 'A\n'
},
{
'section': 'ice',
'pos': 'end'
},
{
'section': 'ice',
'pos': 'begin'
},
{
'role':
'HUMAN',
'prompt':
"A rescuer was driving on an isolated portion of a country road. His headlights caught a figure lying at the side of the road. The rescuer stopped to investigate and found a victim, who was bleeding from head wounds and appeared to have been severely beaten. The rescuer then lifted the victim into his car and drove her to the hospital, a half-hour trip. When they arrived at the hospital, the rescuer carried the victim into the emergency room. He left her with a nurse and then returned home. Although the victim recovered from her injuries, she sued the hospital for malpractice, claiming that she was not promptly given medical attention. At trial, the nurse proposes to testify that when the victim was first brought to the hospital, she was unconscious. The victim's attorney objects and moves to strike the nurse's testimony. The trial judge should\nA. sustain the objection, because it goes to an ultimate issue in the case. \nB. sustain the objection, because the nurse is not qualified to render an expert opinion. \nC. overrule the objection, because it is a shorthand rendition of what she observed. \nD. overrule the objection, because there are independent grounds to show a present sense impression. \nAnswer: " # noqa
},
{
'role': 'BOT',
'prompt': 'C\n'
},
{
'section': 'ice',
'pos': 'end'
},
{
'section': 'ice',
'pos': 'begin'
},
{
'role':
'HUMAN',
'prompt':
"A young woman who attended a rock concert at a nightclub was injured when the band opened its performance with illegal fireworks that ignited foam insulation in the club's ceiling and walls. The young woman sued the radio station that sponsored the performance. The radio station has moved for summary judgment, claiming that it owed no duty to audience members. The evidence has established the following facts: The station advertised its sponsorship on the radio and in print, distributed free tickets to the concert, and in print, distributed free tickets to the concert, staffed the event with the station's interns to assist with crowd control, and provided a station disc jockey to serve as master of ceremonies. The master of ceremonies had the authority to stop or delay the performance at any time on the basis of any safety concern. The station knew or should have known that the band routinely used unlicensed, illegal fireworks in its performances. Should the court grant the radio station's motion for summary judgment? \nA. No, because there is sufficient evidence of knowledge and control on the part of the station to impose on it a duty of care to audience members.\nB. No, because under respondeat superior, the radio station is vicariously liable for the negligent actions of the band.\nC. Yes, because it is the band and the nightclub owners who owed audience members a duty of care.\nD. Yes, because the conduct of the band in setting off illegal fireworks was criminal and setting off illegal fireworks was criminal and was a superseding cause as a matter of law.\nAnswer: " # noqa
},
{
'role': 'BOT',
'prompt': 'A\n'
},
{
'section': 'ice',
'pos': 'end'
},
'\n',
'',
{
'section': 'begin',
'pos': 'end'
},
{
'section': 'round',
'pos': 'begin'
},
{
'role':
'HUMAN',
'prompt':
'A state statute provides: "Whenever a person knows or should know that he (or she) is being arrested by a police officer, it is the duty of such person to refrain from using force or any weapon in resisting arrest. " Violation of the statute is made punishable by fine and/or imprisonment. One morning, there was a bank robbery in the state. That afternoon, a police officer arrested a suspect who he believed was involved in the crime. However, the police officer and the suspect have given different accounts concerning what happened next. According to the police officer, after the suspect was apprehended, he resisted arrest and hit the police officer in the mouth with his fist. The police officer, who was momentarily stunned, pulled out his nightstick and struck the suspect over the head with it. On the other hand, the suspect claimed that after he was arrested, he cursed at the policeman, whereupon the police officer began hitting the suspect with his nightstick. To avoid being hit again, the suspect hit the police officer with his fist, knocking him down. The suspect was charged with assault. The suspect should be found\nA. not guilty, if the arrest was unlawful without probable cause and the jury believes the suspect\'s account.\nB. not guilty, if the arrest was lawful, provided that the jury believes the suspect\'s account.\nC. guilty, if the arrest was lawful, regardless which account the jury believes.\nD. guilty, if the arrest was unlawful, regardless which account the jury believes.\nAnswer: ' # noqa
},
{
'section': 'round',
'pos': 'end'
}
]),
'Hello! How are you?'
]
meta_templates = [
None,
dict(round=[
dict(role='HUMAN', api_role='HUMAN'),
dict(role='BOT', api_role='BOT', generate=True)
], ),
dict(
round=[
dict(role='HUMAN', api_role='HUMAN'),
dict(role='BOT', api_role='BOT', generate=True)
],
reserved_roles=[
dict(role='SYSTEM', api_role='SYSTEM'),
],
)
]
def test_model(model_cfg: ConfigDict):
for meta_template in meta_templates:
print('Testing meta_template: ', meta_template)
model_cfg['meta_template'] = meta_template
model = build_model_from_cfg(model_cfg)
print('Prompt 0 length:',
model.get_token_len_from_template(test_prompts[0]))
print('Prompt 1 length:',
model.get_token_len_from_template(test_prompts[1]))
print('Prompt lengths: ',
model.get_token_len_from_template(test_prompts))
msgs = model.generate_from_template(test_prompts, max_out_len=100)
print('Prompt 0 response:', msgs[0])
print('Prompt 1 response:', msgs[1])
print('-' * 100)
def parse_args():
parser = argparse.ArgumentParser(
description='Test if a given API model wrapper works properly')
parser.add_argument('config', help='Train config file path')
parser.add_argument('-n', '--non-interactive', action='store_true')
args = parser.parse_args()
return args
def parse_model_cfg(model_cfg: ConfigDict) -> Dict[str, ConfigDict]:
model2cfg = {}
for model in model_cfg:
model2cfg[model_abbr_from_cfg(model)] = model
return model2cfg
def main():
args = parse_args()
cfg = Config.fromfile(args.config)
if 'models' not in cfg:
raise ValueError('No "models" specified in config file!')
model2cfg = parse_model_cfg(cfg.models)
if not args.non_interactive and len(model2cfg) > 1:
model = Menu([list(model2cfg.keys())],
['Please make a selection of models:']).run()
else:
model = list(model2cfg.keys())[0]
model_cfg = model2cfg[model]
test_model(model_cfg)
if __name__ == '__main__':
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment