Unverified Commit 7505b3ca authored by Fengzhe Zhou's avatar Fengzhe Zhou Committed by GitHub
Browse files

[Feature] Add huggingface apply_chat_template (#1098)

* add TheoremQA with 5-shot

* add huggingface_above_v4_33 classes

* use num_worker partitioner in cli

* update theoremqa

* update TheoremQA

* add TheoremQA

* rename theoremqa -> TheoremQA

* update TheoremQA output path

* rewrite many model configs

* update huggingface

* further update

* refine configs

* update configs

* update configs

* add configs/eval_llama3_instruct.py

* add summarizer multi faceted

* update bbh datasets

* update configs/models/hf_llama/lmdeploy_llama3_8b_instruct.py

* rename class

* update readme

* update hf above v4.33
parent 6c711cb2
# flake8: noqa: F401, E501 # flake8: noqa: F401, E501
from .circular import CircularSummarizer # noqa: F401 from .circular import CircularSummarizer # noqa: F401
from .default import DefaultSummarizer # noqa: F401 from .default import DefaultSummarizer # noqa: F401
from .llm_compression import LLMCompressionSummarizer from .llm_compression import LLMCompressionSummarizer # noqa: F401
from .multi_faceted import MultiFacetedSummarizer # noqa: F401
from .subjective import * # noqa: F401 from .subjective import * # noqa: F401
...@@ -226,12 +226,12 @@ class DefaultSummarizer: ...@@ -226,12 +226,12 @@ class DefaultSummarizer:
return raw_results, parsed_results, dataset_metrics, dataset_eval_mode return raw_results, parsed_results, dataset_metrics, dataset_eval_mode
def _format_table(self, parsed_results, dataset_metrics, dataset_eval_mode): def _format_table(self, parsed_results, dataset_metrics, dataset_eval_mode, required_dataset_abbrs=None):
dataset_abbrs = [dataset_abbr_from_cfg(dataset) for dataset in self.dataset_cfgs] dataset_abbrs = [dataset_abbr_from_cfg(dataset) for dataset in self.dataset_cfgs]
prompt_version = {dataset_abbr_from_cfg(d): get_prompt_hash(d)[:6] for d in self.dataset_cfgs} prompt_version = {dataset_abbr_from_cfg(d): get_prompt_hash(d)[:6] for d in self.dataset_cfgs}
summarizer_dataset_abbrs = [] summarizer_dataset_abbrs = []
if self.dataset_abbrs is None: if required_dataset_abbrs is None:
# display all dataset metrics included in the config # display all dataset metrics included in the config
for dataset_abbr in dataset_abbrs: for dataset_abbr in dataset_abbrs:
if dataset_abbr in dataset_metrics: if dataset_abbr in dataset_metrics:
...@@ -246,7 +246,7 @@ class DefaultSummarizer: ...@@ -246,7 +246,7 @@ class DefaultSummarizer:
summarizer_dataset_abbrs.append((dataset_abbr, metric)) summarizer_dataset_abbrs.append((dataset_abbr, metric))
else: else:
# follow the required order # follow the required order
for item in self.dataset_abbrs: for item in required_dataset_abbrs:
if isinstance(item, str): if isinstance(item, str):
summarizer_dataset_abbrs.append((item, None)) summarizer_dataset_abbrs.append((item, None))
elif isinstance(item, (list, tuple)): elif isinstance(item, (list, tuple)):
...@@ -306,7 +306,7 @@ class DefaultSummarizer: ...@@ -306,7 +306,7 @@ class DefaultSummarizer:
text = f'{time_str}\n' + \ text = f'{time_str}\n' + \
'tabulate format\n' + \ 'tabulate format\n' + \
'^' * 128 + '\n' + \ '^' * 128 + '\n' + \
tabulate.tabulate(table, headers='firstrow') + '\n' + \ tabulate.tabulate(table, headers='firstrow', floatfmt='.2f') + '\n' + \
'$' * 128 + '\n\n' + \ '$' * 128 + '\n\n' + \
'-' * 128 + ' THIS IS A DIVIDER ' + '-' * 128 + '\n\n' + \ '-' * 128 + ' THIS IS A DIVIDER ' + '-' * 128 + '\n\n' + \
'csv format\n' + \ 'csv format\n' + \
...@@ -338,13 +338,13 @@ class DefaultSummarizer: ...@@ -338,13 +338,13 @@ class DefaultSummarizer:
self._calculate_group_metrics(raw_results, parsed_results, dataset_metrics, dataset_eval_mode) self._calculate_group_metrics(raw_results, parsed_results, dataset_metrics, dataset_eval_mode)
# format table # format table
table = self._format_table(parsed_results, dataset_metrics, dataset_eval_mode) table = self._format_table(parsed_results, dataset_metrics, dataset_eval_mode, required_dataset_abbrs=self.dataset_abbrs)
# format raw txt # format raw txt
raw_txts = self._format_raw_txt(raw_results) raw_txts = self._format_raw_txt(raw_results)
# output to screen # output to screen
print(tabulate.tabulate(table, headers='firstrow')) print(tabulate.tabulate(table, headers='firstrow', floatfmt='.2f'))
# output to .text / .csv files # output to .text / .csv files
self._output_to_file(output_path, time_str, table, raw_txts) self._output_to_file(output_path, time_str, table, raw_txts)
......
# flake8: noqa
# yapf: disable
import functools
import getpass
import math
import os
from datetime import datetime
from typing import Any, Dict, List, Optional
import tabulate
from mmengine import ConfigDict
from .default import DefaultSummarizer
class MultiFacetedSummarizer(DefaultSummarizer):
def __init__(self, config: ConfigDict, dataset_abbrs_list: Optional[Dict[str, List[str]]] = None, summary_groups: List = []) -> None:
super().__init__(config, dataset_abbrs=None, summary_groups=summary_groups)
self.dataset_abbrs_list = dataset_abbrs_list
def summarize(self, output_path: str = None, time_str: str = datetime.now().strftime('%Y%m%d_%H%M%S')):
# pick up results
raw_results, parsed_results, dataset_metrics, dataset_eval_mode = self._pick_up_results()
# calculate group metrics
raw_results, parsed_results, dataset_metrics, dataset_eval_mode = \
self._calculate_group_metrics(raw_results, parsed_results, dataset_metrics, dataset_eval_mode)
for dataset_abbrs_item in self.dataset_abbrs_list:
profile_name = dataset_abbrs_item['name']
profile_dataset_abbrs = dataset_abbrs_item['dataset_abbrs']
# format table
table = self._format_table(parsed_results, dataset_metrics, dataset_eval_mode, required_dataset_abbrs=profile_dataset_abbrs)
# output to screen
print(tabulate.tabulate(table, headers='firstrow', floatfmt='.2f'))
# output to .text / .csv files
output_csv_path = os.path.join(self.work_dir, 'summary', f'summary_{time_str}', f'{profile_name}.csv')
os.makedirs(os.path.dirname(output_csv_path), exist_ok=True)
with open(output_csv_path, 'w', encoding='utf-8') as f:
f.write('\n'.join([','.join(row) for row in table]) + '\n')
self.logger.info(f'write csv to {os.path.abspath(output_csv_path)}')
...@@ -22,5 +22,4 @@ def build_model_from_cfg(model_cfg: ConfigDict): ...@@ -22,5 +22,4 @@ def build_model_from_cfg(model_cfg: ConfigDict):
model_cfg.pop('summarizer_abbr', None) model_cfg.pop('summarizer_abbr', None)
model_cfg.pop('pred_postprocessor', None) model_cfg.pop('pred_postprocessor', None)
model_cfg.pop('min_out_len', None) model_cfg.pop('min_out_len', None)
model_cfg.pop('tokenizer_only', None)
return MODELS.build(model_cfg) return MODELS.build(model_cfg)
...@@ -5,8 +5,10 @@ import tabulate ...@@ -5,8 +5,10 @@ import tabulate
from mmengine.config import Config from mmengine.config import Config
from opencompass.datasets.custom import make_custom_dataset_config from opencompass.datasets.custom import make_custom_dataset_config
from opencompass.models import VLLM, HuggingFaceCausalLM, TurboMindModel from opencompass.models import (VLLM, HuggingFaceBaseModel,
from opencompass.partitioners import NaivePartitioner, SizePartitioner HuggingFaceCausalLM,
HuggingFacewithChatTemplate, TurboMindModel)
from opencompass.partitioners import NaivePartitioner, NumWorkerPartitioner
from opencompass.runners import DLCRunner, LocalRunner, SlurmRunner from opencompass.runners import DLCRunner, LocalRunner, SlurmRunner
from opencompass.tasks import OpenICLEvalTask, OpenICLInferTask from opencompass.tasks import OpenICLEvalTask, OpenICLInferTask
from opencompass.utils import get_logger, match_files from opencompass.utils import get_logger, match_files
...@@ -71,6 +73,7 @@ def get_config_from_arg(args) -> Config: ...@@ -71,6 +73,7 @@ def get_config_from_arg(args) -> Config:
2. args.models and args.datasets 2. args.models and args.datasets
3. Huggingface parameter groups and args.datasets 3. Huggingface parameter groups and args.datasets
""" """
logger = get_logger()
if args.config: if args.config:
config = Config.fromfile(args.config, format_python_code=False) config = Config.fromfile(args.config, format_python_code=False)
config = try_fill_in_custom_cfgs(config) config = try_fill_in_custom_cfgs(config)
...@@ -140,19 +143,25 @@ def get_config_from_arg(args) -> Config: ...@@ -140,19 +143,25 @@ def get_config_from_arg(args) -> Config:
f'Config file {model[1]} does not contain "models" field') f'Config file {model[1]} does not contain "models" field')
models += cfg['models'] models += cfg['models']
else: else:
from opencompass.models import HuggingFace if args.hf_type == 'chat':
model = dict(type=f'{HuggingFace.__module__}.{HuggingFace.__name__}', mod = HuggingFacewithChatTemplate
else:
mod = HuggingFaceBaseModel
model = dict(type=f'{mod.__module__}.{mod.__name__}',
abbr=args.hf_path.split('/')[-1] + '_hf',
path=args.hf_path, path=args.hf_path,
peft_path=args.peft_path,
tokenizer_path=args.tokenizer_path,
model_kwargs=args.model_kwargs, model_kwargs=args.model_kwargs,
tokenizer_path=args.tokenizer_path,
tokenizer_kwargs=args.tokenizer_kwargs, tokenizer_kwargs=args.tokenizer_kwargs,
peft_path=args.peft_path,
peft_kwargs=args.peft_kwargs,
max_seq_len=args.max_seq_len, max_seq_len=args.max_seq_len,
max_out_len=args.max_out_len, max_out_len=args.max_out_len,
batch_padding=not args.no_batch_padding,
batch_size=args.batch_size, batch_size=args.batch_size,
pad_token_id=args.pad_token_id, pad_token_id=args.pad_token_id,
stop_words=args.stop_words,
run_cfg=dict(num_gpus=args.num_gpus)) run_cfg=dict(num_gpus=args.num_gpus))
logger.debug(f'Using model: {model}')
models.append(model) models.append(model)
# set infer accelerator if needed # set infer accelerator if needed
if args.accelerator in ['vllm', 'lmdeploy']: if args.accelerator in ['vllm', 'lmdeploy']:
...@@ -173,7 +182,7 @@ def get_config_from_arg(args) -> Config: ...@@ -173,7 +182,7 @@ def get_config_from_arg(args) -> Config:
summarizer_file = summarizer_arg summarizer_file = summarizer_arg
s = match_cfg_file(summarizers_dir, [summarizer_file])[0] s = match_cfg_file(summarizers_dir, [summarizer_file])[0]
get_logger().info(f'Loading {s[0]}: {s[1]}') logger.info(f'Loading {s[0]}: {s[1]}')
cfg = Config.fromfile(s[1]) cfg = Config.fromfile(s[1])
# Use summarizer_key to retrieve the summarizer definition # Use summarizer_key to retrieve the summarizer definition
# from the configuration file # from the configuration file
...@@ -186,28 +195,23 @@ def get_config_from_arg(args) -> Config: ...@@ -186,28 +195,23 @@ def get_config_from_arg(args) -> Config:
def change_accelerator(models, accelerator): def change_accelerator(models, accelerator):
models = models.copy() models = models.copy()
logger = get_logger()
model_accels = [] model_accels = []
for model in models: for model in models:
get_logger().info(f'Transforming {model["abbr"]} to {accelerator}') logger.info(f'Transforming {model["abbr"]} to {accelerator}')
# change HuggingFace model to VLLM or TurboMindModel # change HuggingFace model to VLLM or TurboMindModel
if model['type'] is HuggingFaceCausalLM: if model['type'] is HuggingFaceCausalLM:
gen_args = dict() gen_args = dict()
if model.get('generation_kwargs') is not None: if model.get('generation_kwargs') is not None:
generation_kwargs = model['generation_kwargs'].copy() generation_kwargs = model['generation_kwargs'].copy()
gen_args['temperature'] = 0.001 if generation_kwargs.get( gen_args['temperature'] = generation_kwargs.get(
'temperature' 'temperature', 0.001)
) is None else generation_kwargs['temperature'] gen_args['top_k'] = generation_kwargs.get('top_k', 1)
gen_args['top_k'] = 1 if generation_kwargs.get( gen_args['top_p'] = generation_kwargs.get('top_p', 0.9)
'top_k') is None else generation_kwargs['top_k'] gen_args['stop_token_ids'] = generation_kwargs.get(
gen_args['top_p'] = 0.9 if generation_kwargs.get( 'eos_token_id', None)
'top_p') is None else generation_kwargs['top_p'] generation_kwargs['stop_token_ids'] = generation_kwargs.get(
gen_args['stop_token_ids'] = None if generation_kwargs.get( 'eos_token_id', None)
'eos_token_id'
) is None else generation_kwargs['eos_token_id']
generation_kwargs[
'stop_token_ids'] = None if generation_kwargs.get(
'eos_token_id'
) is None else generation_kwargs['eos_token_id']
generation_kwargs.pop('eos_token_id') generation_kwargs.pop('eos_token_id')
else: else:
# if generation_kwargs is not provided, set default values # if generation_kwargs is not provided, set default values
...@@ -218,11 +222,10 @@ def change_accelerator(models, accelerator): ...@@ -218,11 +222,10 @@ def change_accelerator(models, accelerator):
gen_args['stop_token_ids'] = None gen_args['stop_token_ids'] = None
if accelerator == 'lmdeploy': if accelerator == 'lmdeploy':
get_logger().info( logger.info(f'Transforming {model["abbr"]} to {accelerator}')
f'Transforming {model["abbr"]} to {accelerator}') mod = TurboMindModel
acc_model = dict( acc_model = dict(
type= # noqa E251 type=f'{mod.__module__}.{mod.__name__}',
f'{TurboMindModel.__module__}.{TurboMindModel.__name__}',
abbr=model['abbr'].replace('hf', 'lmdeploy') abbr=model['abbr'].replace('hf', 'lmdeploy')
if '-hf' in model['abbr'] else model['abbr'] + '-lmdeploy', if '-hf' in model['abbr'] else model['abbr'] + '-lmdeploy',
path=model['path'], path=model['path'],
...@@ -244,8 +247,7 @@ def change_accelerator(models, accelerator): ...@@ -244,8 +247,7 @@ def change_accelerator(models, accelerator):
if model.get(item) is not None: if model.get(item) is not None:
acc_model[item] = model[item] acc_model[item] = model[item]
elif accelerator == 'vllm': elif accelerator == 'vllm':
get_logger().info( logger.info(f'Transforming {model["abbr"]} to {accelerator}')
f'Transforming {model["abbr"]} to {accelerator}')
acc_model = dict( acc_model = dict(
type=f'{VLLM.__module__}.{VLLM.__name__}', type=f'{VLLM.__module__}.{VLLM.__name__}',
...@@ -275,9 +277,8 @@ def get_config_type(obj) -> str: ...@@ -275,9 +277,8 @@ def get_config_type(obj) -> str:
def fill_infer_cfg(cfg, args): def fill_infer_cfg(cfg, args):
new_cfg = dict(infer=dict( new_cfg = dict(infer=dict(
partitioner=dict(type=get_config_type(SizePartitioner), partitioner=dict(type=get_config_type(NumWorkerPartitioner),
max_task_size=args.max_partition_size, num_worker=args.max_num_workers),
gen_task_coef=args.gen_task_coef),
runner=dict( runner=dict(
max_num_workers=args.max_num_workers, max_num_workers=args.max_num_workers,
debug=args.debug, debug=args.debug,
......
...@@ -54,7 +54,7 @@ def print_prompts(model_cfg, dataset_cfg, count=1): ...@@ -54,7 +54,7 @@ def print_prompts(model_cfg, dataset_cfg, count=1):
# extracted and generalized as a static method in these Inferencers # extracted and generalized as a static method in these Inferencers
# and reused here. # and reused here.
if model_cfg: if model_cfg:
max_seq_len = model_cfg.max_seq_len max_seq_len = model_cfg.get('max_seq_len', 32768)
if not model_cfg['type'].is_api: if not model_cfg['type'].is_api:
model_cfg['tokenizer_only'] = True model_cfg['tokenizer_only'] = True
model = build_model_from_cfg(model_cfg) model = build_model_from_cfg(model_cfg)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment