Unverified Commit 5e75e297 authored by Jingming's avatar Jingming Committed by GitHub
Browse files

[Feature] Add multi-prompt generation demo (#568)



* [Feature] Add multi-prompt generation demo

* [Fix] change form in winogrande_gen_XXX.py

* [Fix] make multi prompt demo more directly

* [Fix] fix bug

* [Fix] minor fix

---------
Co-authored-by: default avataryingfhu <yingfhu@gmail.com>
parent 91fba2c2
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import winograndeDataset_V2
from opencompass.utils.text_postprocessors import first_option_postprocess
winogrande_reader_cfg = dict(
input_columns=["opt1", "opt2"],
output_column="answer",
)
winogrande_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type=first_option_postprocess, options='AB'),
)
_winogrande_prompt = dict(
prompt_1="Which of the following is a good sentence:\nA. {opt1}\nB. {opt2}\nAnswer:",
prompt_2="Which is a good sentence out of the following:\nA. {opt1}\nB. {opt2}\nAnswer:",
prompt_3="Can you identify a good sentence from the following:\nA. {opt1}\nB. {opt2}\nAnswer:",
)
winogrande_datasets = []
for _choice in _winogrande_prompt:
winogrande_datasets.append(
dict(
abbr='winogrande_'+_choice,
type=winograndeDataset_V2,
path="./data/winogrande",
reader_cfg=winogrande_reader_cfg,
infer_cfg=dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=_winogrande_prompt[_choice]
),
]),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
),
eval_cfg=winogrande_eval_cfg),
)
del _choice
\ No newline at end of file
from mmengine.config import read_base
from opencompass.models import HuggingFaceCausalLM
with read_base():
from .datasets.winogrande.winogrande_gen_a027b6 import winogrande_datasets
datasets = [*winogrande_datasets]
_meta_template = dict(
round=[
dict(role='HUMAN', begin='<|User|>:', end='<eoh>\n'),
dict(role='BOT', begin='<|Bot|>:', end='<eoa>\n', generate=True),
],
)
models=[
dict(
type=HuggingFaceCausalLM,
abbr='internlm-chat-7b-hf',
path="internlm/internlm-chat-7b",
tokenizer_path='internlm/internlm-chat-7b',
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
use_fast=False,
trust_remote_code=True,
),
max_out_len=100,
max_seq_len=2048,
batch_size=8,
meta_template=_meta_template,
model_kwargs=dict(
trust_remote_code=True,
device_map='auto',
),
run_cfg=dict(num_gpus=1, num_procs=1),
)
]
_winogrande_all = [d['abbr'] for d in winogrande_datasets]
summarizer = dict(
summary_groups=[
{'name': 'winogrande', 'subsets': _winogrande_all},
{'name': 'winogrande_std', 'subsets': _winogrande_all, 'std': True},
]
)
\ No newline at end of file
# flake8: noqa # flake8: noqa
# yapf: disable # yapf: disable
import getpass import getpass
import math
import os.path as osp import os.path as osp
from datetime import datetime from datetime import datetime
from typing import List, Optional from typing import List, Optional
...@@ -127,21 +128,28 @@ class DefaultSummarizer: ...@@ -127,21 +128,28 @@ class DefaultSummarizer:
results[dataset_abbr] = parsed_results[model_abbr][dataset_abbr][0] results[dataset_abbr] = parsed_results[model_abbr][dataset_abbr][0]
eval_modes.append(dataset_eval_mode.get(dataset_abbr, 'unknown')) eval_modes.append(dataset_eval_mode.get(dataset_abbr, 'unknown'))
if len(results) == len(sg['subsets']): if len(results) == len(sg['subsets']):
if 'weights' in sg: if 'std' in sg and sg['std'] == True:
numerator = sum(results[k] * sg['weights'][k] for k in sg['weights']) avg = sum(results[k] for k in results) / len(results)
denominator = sum(sg['weights'].values()) variance = sum((results[k] - avg)**2 for k in results) / len(results)
metric = 'weighted_average' metric = 'standard_deviation'
results[metric] = math.sqrt(variance)
else: else:
numerator = sum(results[k] for k in results) if 'weights' in sg:
denominator = len(results) numerator = sum(results[k] * sg['weights'][k] for k in sg['weights'])
metric = 'naive_average' denominator = sum(sg['weights'].values())
results[metric] = numerator / denominator metric = 'weighted_average'
else:
numerator = sum(results[k] for k in results)
denominator = len(results)
metric = 'naive_average'
results[metric] = numerator / denominator
eval_modes = list(set(eval_modes)) eval_modes = list(set(eval_modes))
eval_mode = eval_modes[0] if len(eval_modes) == 1 else 'mixed' eval_mode = eval_modes[0] if len(eval_modes) == 1 else 'mixed'
# add to global results # add to global results
raw_results[model_abbr][sg['name']] = results raw_results[model_abbr][sg['name']] = results
parsed_results[model_abbr][sg['name']] = [numerator / denominator] parsed_results[model_abbr][sg['name']] = [results[metric]]
dataset_metrics[sg['name']] = [metric] dataset_metrics[sg['name']] = [metric]
dataset_eval_mode[sg['name']] = eval_mode dataset_eval_mode[sg['name']] = eval_mode
elif len(results) == 0: elif len(results) == 0:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment