Unverified Commit 7505b3ca authored by Fengzhe Zhou's avatar Fengzhe Zhou Committed by GitHub
Browse files

[Feature] Add huggingface apply_chat_template (#1098)

* add TheoremQA with 5-shot

* add huggingface_above_v4_33 classes

* use num_worker partitioner in cli

* update theoremqa

* update TheoremQA

* add TheoremQA

* rename theoremqa -> TheoremQA

* update TheoremQA output path

* rewrite many model configs

* update huggingface

* further update

* refine configs

* update configs

* update configs

* add configs/eval_llama3_instruct.py

* add summarizer multi faceted

* update bbh datasets

* update configs/models/hf_llama/lmdeploy_llama3_8b_instruct.py

* rename class

* update readme

* update hf above v4.33
parent 6c711cb2
from opencompass.models import HuggingFaceCausalLM from opencompass.models import HuggingFaceBaseModel
models = [ models = [
dict( dict(
type=HuggingFaceCausalLM, type=HuggingFaceBaseModel,
abbr='deepseek-7b-base-hf', abbr='deepseek-7b-base-hf',
path="deepseek-ai/deepseek-llm-7b-base", path='deepseek-ai/deepseek-llm-7b-base',
tokenizer_path='deepseek-ai/deepseek-llm-7b-base', max_out_len=1024,
model_kwargs=dict(
device_map='auto',
trust_remote_code=True,
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
use_fast=False,
),
max_out_len=100,
max_seq_len=2048,
batch_size=8, batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1), run_cfg=dict(num_gpus=1),
) )
] ]
from opencompass.models import HuggingFaceCausalLM from opencompass.models import HuggingFacewithChatTemplate
_meta_template = dict(
begin='<|begin▁of▁sentence|>',
round=[
dict(role="HUMAN", begin='User: ', end='\n\n'),
dict(role="BOT", begin="Assistant: ", end='<|end▁of▁sentence|>', generate=True),
],
)
models = [ models = [
dict( dict(
type=HuggingFaceCausalLM, type=HuggingFacewithChatTemplate,
abbr='deepseek-7b-chat-hf', abbr='deepseek-7b-chat-hf',
path="deepseek-ai/deepseek-llm-7b-chat", path='deepseek-ai/deepseek-llm-7b-chat',
model_kwargs=dict( max_out_len=1024,
device_map='auto',
trust_remote_code=True,
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
use_fast=False,
),
meta_template=_meta_template,
max_out_len=100,
max_seq_len=2048,
batch_size=8, batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1), run_cfg=dict(num_gpus=1),
batch_padding=True,
) )
] ]
from opencompass.models import HuggingFaceCausalLM from opencompass.models import HuggingFacewithChatTemplate
_meta_template = dict(
round=[
dict(role="HUMAN", begin='### Instruction:\n', end='\n'),
dict(role="BOT", begin="### Response:\n", end='<|EOT|>', generate=True),
],
eos_token_id=100001,
)
models = [ models = [
dict( dict(
type=HuggingFaceCausalLM, type=HuggingFacewithChatTemplate,
abbr='deepseek-coder-1.3b-hf', abbr='deepseek-coder-1.3b-hf',
path="deepseek-ai/deepseek-coder-1.3b-instruct", path='deepseek-ai/deepseek-coder-1.3b-instruct',
tokenizer_path='deepseek-ai/deepseek-coder-1.3b-instruct', max_out_len=1024,
model_kwargs=dict(
device_map='auto',
trust_remote_code=True,
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
use_fast=False,
),
meta_template=_meta_template,
max_out_len=2048,
max_seq_len=2048,
batch_size=8, batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1), run_cfg=dict(num_gpus=1),
end_str='<|EOT|>',
) )
] ]
\ No newline at end of file
from opencompass.models import HuggingFaceCausalLM from opencompass.models import HuggingFacewithChatTemplate
_meta_template = dict(
round=[
dict(role="HUMAN", begin='### Instruction:\n', end='\n'),
dict(role="BOT", begin="### Response:\n", end='<|EOT|>', generate=True),
],
eos_token_id=100001,
)
models = [ models = [
dict( dict(
type=HuggingFaceCausalLM, type=HuggingFacewithChatTemplate,
abbr='deepseek-coder-33b-hf', abbr='deepseek-coder-33b-hf',
path="deepseek-ai/deepseek-coder-33b-instruct", path='deepseek-ai/deepseek-coder-33b-instruct',
tokenizer_path='deepseek-ai/deepseek-coder-33b-instruct', max_out_len=1024,
model_kwargs=dict(
device_map='auto',
trust_remote_code=True,
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
use_fast=False,
),
meta_template=_meta_template,
max_out_len=2048,
max_seq_len=2048,
batch_size=8, batch_size=8,
run_cfg=dict(num_gpus=4, num_procs=1), run_cfg=dict(num_gpus=2),
end_str='<|EOT|>',
) )
] ]
\ No newline at end of file
from opencompass.models import HuggingFaceCausalLM from opencompass.models import HuggingFacewithChatTemplate
_meta_template = dict(
round=[
dict(role="HUMAN", begin='### Instruction:\n', end='\n'),
dict(role="BOT", begin="### Response:\n", end='<|EOT|>', generate=True),
],
eos_token_id=100001,
)
models = [ models = [
dict( dict(
type=HuggingFaceCausalLM, type=HuggingFacewithChatTemplate,
abbr='deepseek-coder-6.7b-hf', abbr='deepseek-coder-6.7b-hf',
path="deepseek-ai/deepseek-coder-6.7b-instruct", path='deepseek-ai/deepseek-coder-6.7b-instruct',
tokenizer_path='deepseek-ai/deepseek-coder-6.7b-instruct', max_out_len=1024,
model_kwargs=dict(
device_map='auto',
trust_remote_code=True,
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
use_fast=False,
),
meta_template=_meta_template,
max_out_len=2048,
max_seq_len=2048,
batch_size=8, batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1), run_cfg=dict(num_gpus=1),
end_str='<|EOT|>',
) )
] ]
\ No newline at end of file
from opencompass.models import HuggingFaceCausalLM from opencompass.models import HuggingFaceBaseModel
models = [ models = [
dict( dict(
type=HuggingFaceCausalLM, type=HuggingFaceBaseModel,
abbr='deepseek-moe-16b-base-hf', abbr='deepseek-moe-16b-base-hf',
path="deepseek-ai/deepseek-moe-16b-base", path='deepseek-ai/deepseek-moe-16b-base',
tokenizer_path='deepseek-ai/deepseek-moe-16b-base', max_out_len=1024,
model_kwargs=dict(
device_map='auto',
trust_remote_code=True,
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
),
max_out_len=100,
min_out_len=3,
max_seq_len=2048,
batch_size=8, batch_size=8,
run_cfg=dict(num_gpus=2, num_procs=1), run_cfg=dict(num_gpus=1),
) )
] ]
from opencompass.models import HuggingFaceCausalLM from opencompass.models import HuggingFacewithChatTemplate
_meta_template = dict(
begin='<|begin▁of▁sentence|>',
round=[
dict(role="HUMAN", begin='User: ', end='\n\n'),
dict(role="BOT", begin="Assistant: ", end='<|end▁of▁sentence|>', generate=True),
],
)
models = [ models = [
dict( dict(
type=HuggingFaceCausalLM, type=HuggingFacewithChatTemplate,
abbr='deepseek-moe-16b-chat-hf', abbr='deepseek-moe-16b-chat-hf',
path="deepseek-ai/deepseek-moe-16b-chat", path='deepseek-ai/deepseek-moe-16b-chat',
model_kwargs=dict( max_out_len=1024,
device_map='auto',
trust_remote_code=True,
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
),
meta_template=_meta_template,
max_out_len=100,
max_seq_len=2048,
batch_size=8, batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1), run_cfg=dict(num_gpus=1),
batch_padding=True,
) )
] ]
# Only torch >=2.0 is supported for falcon-40b from opencompass.models import HuggingFaceBaseModel
from opencompass.models import HuggingFaceCausalLM
models = [ models = [
dict( dict(
type=HuggingFaceCausalLM, type=HuggingFaceBaseModel,
abbr='falcon-40b-hf', abbr='falcon-40b-hf',
path='tiiuae/falcon-40b', path='tiiuae/falcon-40b',
tokenizer_path='tiiuae/falcon-40b', max_out_len=1024,
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
),
max_out_len=100,
max_seq_len=2048,
batch_size=8, batch_size=8,
model_kwargs=dict(trust_remote_code=True, device_map='auto', revision='561820f7eef0cc56a31ea38af15ca1acb07fab5d'), run_cfg=dict(num_gpus=4),
run_cfg=dict(num_gpus=4, num_procs=1),
) )
] ]
from opencompass.models import HuggingFaceCausalLM from opencompass.models import HuggingFaceBaseModel
models = [ models = [
dict( dict(
type=HuggingFaceCausalLM, type=HuggingFaceBaseModel,
abbr='falcon-7b-hf', abbr='falcon-7b-hf',
path='tiiuae/falcon-7b', path='tiiuae/falcon-7b',
tokenizer_path='tiiuae/falcon-7b', max_out_len=1024,
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
),
max_out_len=100,
max_seq_len=2048,
batch_size=8, batch_size=8,
model_kwargs=dict(trust_remote_code=True, device_map='auto', revision='2f5c3cd4eace6be6c0f12981f377fb35e5bf6ee5'), run_cfg=dict(num_gpus=1),
run_cfg=dict(num_gpus=1, num_procs=1),
) )
] ]
from opencompass.models import HuggingFaceCausalLM from opencompass.models import HuggingFaceBaseModel
models = [ models = [
dict( dict(
type=HuggingFaceCausalLM, type=HuggingFaceBaseModel,
abbr='gemma-2b-hf', abbr='gemma-2b-hf',
path="google/gemma-2b", path='google/gemma-2b',
model_kwargs=dict( max_out_len=1024,
device_map='auto',
trust_remote_code=True
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
use_fast=False,
),
max_out_len=100,
max_seq_len=2048,
batch_size=8, batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1), run_cfg=dict(num_gpus=1),
) )
] ]
from opencompass.models import HuggingFaceCausalLM from opencompass.models import HuggingFacewithChatTemplate
_meta_template = dict(
round=[
dict(role="HUMAN", begin='<start_of_turn>user\n', end='<end_of_turn>\n'),
dict(role="BOT", begin="<start_of_turn>model\n", end='<end_of_turn>\n', generate=True),
],
)
models = [ models = [
dict( dict(
type=HuggingFaceCausalLM, type=HuggingFacewithChatTemplate,
abbr='gemma-2b-it-hf', abbr='gemma-2b-it-hf',
path="google/gemma-2b-it", path='google/gemma-2b-it',
model_kwargs=dict( max_out_len=1024,
device_map='auto',
trust_remote_code=True
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
use_fast=False,
),
meta_template=_meta_template,
min_out_len=1,
max_out_len=100,
max_seq_len=2048,
batch_size=8, batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1), run_cfg=dict(num_gpus=1),
batch_padding=True,
) )
] ]
from opencompass.models import HuggingFaceCausalLM from opencompass.models import HuggingFaceBaseModel
models = [ models = [
dict( dict(
type=HuggingFaceCausalLM, type=HuggingFaceBaseModel,
abbr='gemma-7b-hf', abbr='gemma-7b-hf',
path="google/gemma-7b", path='google/gemma-7b',
model_kwargs=dict( max_out_len=1024,
device_map='auto',
trust_remote_code=True
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
use_fast=False,
),
max_out_len=100,
max_seq_len=2048,
batch_size=8, batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1), run_cfg=dict(num_gpus=1),
) )
] ]
from opencompass.models import HuggingFaceCausalLM from opencompass.models import HuggingFacewithChatTemplate
_meta_template = dict(
round=[
dict(role="HUMAN", begin='<start_of_turn>user\n', end='<end_of_turn>\n'),
dict(role="BOT", begin="<start_of_turn>model\n", end='<end_of_turn>\n', generate=True),
],
)
models = [ models = [
dict( dict(
type=HuggingFaceCausalLM, type=HuggingFacewithChatTemplate,
abbr='gemma-7b-it-hf', abbr='gemma-7b-it-hf',
path="google/gemma-7b-it", path='google/gemma-7b-it',
model_kwargs=dict( max_out_len=1024,
device_map='auto',
trust_remote_code=True
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
use_fast=False,
),
meta_template=_meta_template,
min_out_len=1,
max_out_len=100,
max_seq_len=2048,
batch_size=8, batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1), run_cfg=dict(num_gpus=1),
batch_padding=True,
) )
] ]
from opencompass.models import HuggingFaceCausalLM from opencompass.models import HuggingFaceBaseModel
models = [ models = [
dict( dict(
type=HuggingFaceCausalLM, type=HuggingFaceBaseModel,
abbr='internlm2-1.8b-hf', abbr='internlm2-1.8b-hf',
path="internlm/internlm2-1_8b", path="internlm/internlm2-1_8b",
tokenizer_path='internlm/internlm2-1_8b', max_out_len=1024,
model_kwargs=dict(
trust_remote_code=True,
device_map='auto',
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
use_fast=False,
trust_remote_code=True,
),
max_out_len=100,
min_out_len=1,
max_seq_len=2048,
batch_size=8, batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1), run_cfg=dict(num_gpus=1),
) )
] ]
from opencompass.models import HuggingFaceCausalLM from opencompass.models import HuggingFaceBaseModel
models = [ models = [
dict( dict(
type=HuggingFaceCausalLM, type=HuggingFaceBaseModel,
abbr='internlm2-20b-hf', abbr='internlm2-20b-hf',
path="internlm/internlm2-20b", path="internlm/internlm2-20b",
tokenizer_path='internlm/internlm2-20b', max_out_len=1024,
model_kwargs=dict(
trust_remote_code=True,
device_map='auto',
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
use_fast=False,
trust_remote_code=True,
),
max_out_len=100,
min_out_len=1,
max_seq_len=2048,
batch_size=8, batch_size=8,
run_cfg=dict(num_gpus=2, num_procs=1), run_cfg=dict(num_gpus=2),
) )
] ]
from opencompass.models import HuggingFaceCausalLM from opencompass.models import HuggingFaceBaseModel
models = [ models = [
dict( dict(
type=HuggingFaceCausalLM, type=HuggingFaceBaseModel,
abbr='internlm2-7b-hf', abbr='internlm2-7b-hf',
path="internlm/internlm2-7b", path="internlm/internlm2-7b",
tokenizer_path='internlm/internlm2-7b', max_out_len=1024,
model_kwargs=dict(
trust_remote_code=True,
device_map='auto',
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
use_fast=False,
trust_remote_code=True,
),
max_out_len=100,
min_out_len=1,
max_seq_len=2048,
batch_size=8, batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1), run_cfg=dict(num_gpus=1),
) )
] ]
from opencompass.models import HuggingFaceCausalLM from opencompass.models import HuggingFaceBaseModel
models = [ models = [
dict( dict(
type=HuggingFaceCausalLM, type=HuggingFaceBaseModel,
abbr='internlm2-base-20b-hf', abbr='internlm2-base-20b-hf',
path="internlm/internlm2-base-20b", path="internlm/internlm2-base-20b",
tokenizer_path='internlm/internlm2-base-20b', max_out_len=1024,
model_kwargs=dict(
trust_remote_code=True,
device_map='auto',
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
use_fast=False,
trust_remote_code=True,
),
max_out_len=100,
min_out_len=1,
max_seq_len=2048,
batch_size=8, batch_size=8,
run_cfg=dict(num_gpus=2, num_procs=1), run_cfg=dict(num_gpus=2),
) )
] ]
from opencompass.models import HuggingFaceCausalLM from opencompass.models import HuggingFaceBaseModel
models = [ models = [
dict( dict(
type=HuggingFaceCausalLM, type=HuggingFaceBaseModel,
abbr='internlm2-base-7b-hf', abbr='internlm2-base-7b-hf',
path="internlm/internlm2-base-7b", path="internlm/internlm2-base-7b",
tokenizer_path='internlm/internlm2-base-7b', max_out_len=1024,
model_kwargs=dict(
trust_remote_code=True,
device_map='auto',
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
use_fast=False,
trust_remote_code=True,
),
max_out_len=100,
min_out_len=1,
max_seq_len=2048,
batch_size=8, batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1), run_cfg=dict(num_gpus=1),
) )
] ]
from opencompass.models import HuggingFaceCausalLM from opencompass.models import HuggingFacewithChatTemplate
_meta_template = dict(
round=[
dict(role='HUMAN', begin='<|im_start|>user\n', end='<|im_end|>\n'),
dict(role='BOT', begin='<|im_start|>assistant\n', end='<|im_end|>\n', generate=True),
],
)
models = [ models = [
dict( dict(
type=HuggingFaceCausalLM, type=HuggingFacewithChatTemplate,
abbr='internlm2-chat-1.8b-hf', abbr='internlm2-chat-1.8b-hf',
path="internlm/internlm2-chat-1_8b", path='internlm/internlm2-chat-1_8b',
tokenizer_path='internlm/internlm2-chat-1_8b', max_out_len=1024,
model_kwargs=dict(
trust_remote_code=True,
device_map='auto',
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
use_fast=False,
trust_remote_code=True,
),
max_out_len=100,
max_seq_len=2048,
batch_size=8, batch_size=8,
meta_template=_meta_template, run_cfg=dict(num_gpus=1),
run_cfg=dict(num_gpus=1, num_procs=1), stop_words=['</s>', '<|im_end|>'],
end_str='<|im_end|>',
generation_kwargs = {"eos_token_id": [2, 92542]},
batch_padding=True,
) )
] ]
from opencompass.models import HuggingFaceCausalLM from opencompass.models import HuggingFacewithChatTemplate
_meta_template = dict(
round=[
dict(role='HUMAN', begin='<|im_start|>user\n', end='<|im_end|>\n'),
dict(role='BOT', begin='<|im_start|>assistant\n', end='<|im_end|>\n', generate=True),
],
)
models = [ models = [
dict( dict(
type=HuggingFaceCausalLM, type=HuggingFacewithChatTemplate,
abbr='internlm2-chat-1.8b-sft-hf', abbr='internlm2-chat-1.8b-sft-hf',
path="internlm/internlm2-chat-1_8b-sft", path='internlm/internlm2-chat-1_8b-sft',
tokenizer_path='internlm/internlm2-chat-1_8b-sft', max_out_len=1024,
model_kwargs=dict(
trust_remote_code=True,
device_map='auto',
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
use_fast=False,
trust_remote_code=True,
),
max_out_len=100,
max_seq_len=2048,
batch_size=8, batch_size=8,
meta_template=_meta_template, run_cfg=dict(num_gpus=1),
run_cfg=dict(num_gpus=1, num_procs=1), stop_words=['</s>', '<|im_end|>'],
end_str='<|im_end|>',
generation_kwargs = {"eos_token_id": [2, 92542]},
batch_padding=True,
) )
] ]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment