Unverified Commit 8c85edd1 authored by Fengzhe Zhou's avatar Fengzhe Zhou Committed by GitHub
Browse files

[Sync] deprecate old mbpps (#1064)

parent c1724013
...@@ -2,8 +2,8 @@ from opencompass.models import HuggingFaceCausalLM ...@@ -2,8 +2,8 @@ from opencompass.models import HuggingFaceCausalLM
_meta_template = dict( _meta_template = dict(
round=[ round=[
dict(role="HUMAN", begin=' [INST] ', end=' [/INST] '), dict(role="HUMAN", begin='[INST] ', end=' [/INST]'),
dict(role="BOT", begin='', end='', generate=True), dict(role="BOT", begin=' ', end=' ', generate=True),
], ],
) )
...@@ -27,5 +27,6 @@ models = [ ...@@ -27,5 +27,6 @@ models = [
batch_size=8, batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1), run_cfg=dict(num_gpus=1, num_procs=1),
end_str='[INST]', end_str='[INST]',
batch_padding=True,
) )
] ]
...@@ -4,10 +4,9 @@ from opencompass.models import HuggingFaceCausalLM ...@@ -4,10 +4,9 @@ from opencompass.models import HuggingFaceCausalLM
_meta_template = dict( _meta_template = dict(
begin="<s>", begin="<s>",
round=[ round=[
dict(role="HUMAN", begin='[INST]', end='[/INST]'), dict(role="HUMAN", begin='[INST] ', end=' [/INST]'),
dict(role="BOT", begin="", end='</s>', generate=True), dict(role="BOT", begin="", end='</s> ', generate=True),
], ],
eos_token_id=2
) )
models = [ models = [
...@@ -30,5 +29,6 @@ models = [ ...@@ -30,5 +29,6 @@ models = [
max_seq_len=2048, max_seq_len=2048,
batch_size=8, batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1), run_cfg=dict(num_gpus=1, num_procs=1),
batch_padding=True,
) )
] ]
...@@ -4,10 +4,9 @@ from opencompass.models import HuggingFaceCausalLM ...@@ -4,10 +4,9 @@ from opencompass.models import HuggingFaceCausalLM
_meta_template = dict( _meta_template = dict(
begin="<s>", begin="<s>",
round=[ round=[
dict(role="HUMAN", begin='[INST]', end='[/INST]'), dict(role="HUMAN", begin='[INST] ', end=' [/INST]'),
dict(role="BOT", begin="", end='</s>', generate=True), dict(role="BOT", begin="", end='</s> ', generate=True),
], ],
eos_token_id=2
) )
models = [ models = [
...@@ -30,6 +29,6 @@ models = [ ...@@ -30,6 +29,6 @@ models = [
max_seq_len=2048, max_seq_len=2048,
batch_size=8, batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1), run_cfg=dict(num_gpus=1, num_procs=1),
end_str='</s>', batch_padding=True,
) )
] ]
...@@ -4,10 +4,9 @@ from opencompass.models import HuggingFaceCausalLM ...@@ -4,10 +4,9 @@ from opencompass.models import HuggingFaceCausalLM
_meta_template = dict( _meta_template = dict(
begin="<s>", begin="<s>",
round=[ round=[
dict(role="HUMAN", begin='[INST]', end='[/INST]'), dict(role="HUMAN", begin='[INST] ', end=' [/INST]'),
dict(role="BOT", begin="", end='</s>', generate=True), dict(role="BOT", begin="", end='</s> ', generate=True),
], ],
eos_token_id=2
) )
models = [ models = [
...@@ -30,6 +29,6 @@ models = [ ...@@ -30,6 +29,6 @@ models = [
max_seq_len=2048, max_seq_len=2048,
batch_size=8, batch_size=8,
run_cfg=dict(num_gpus=2, num_procs=1), run_cfg=dict(num_gpus=2, num_procs=1),
end_str='</s>', batch_padding=True,
) )
] ]
...@@ -12,7 +12,6 @@ models = [ ...@@ -12,7 +12,6 @@ models = [
type=HuggingFace, type=HuggingFace,
abbr='minicpm-2b-dpo-hf', abbr='minicpm-2b-dpo-hf',
path='openbmb/MiniCPM-2B-dpo-fp32', path='openbmb/MiniCPM-2B-dpo-fp32',
tokenizer_path='openbmb/MiniCPM-2B-dpo-fp32',
model_kwargs=dict( model_kwargs=dict(
trust_remote_code=True, trust_remote_code=True,
device_map='auto', device_map='auto',
...@@ -27,6 +26,6 @@ models = [ ...@@ -27,6 +26,6 @@ models = [
max_seq_len=2048, max_seq_len=2048,
batch_size=8, batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1), run_cfg=dict(num_gpus=1, num_procs=1),
end_str='<用户>', batch_padding=True,
) )
] ]
...@@ -12,7 +12,6 @@ models = [ ...@@ -12,7 +12,6 @@ models = [
type=HuggingFace, type=HuggingFace,
abbr='minicpm-2b-sft-hf', abbr='minicpm-2b-sft-hf',
path='openbmb/MiniCPM-2B-sft-fp32', path='openbmb/MiniCPM-2B-sft-fp32',
tokenizer_path='openbmb/MiniCPM-2B-sft-fp32',
model_kwargs=dict( model_kwargs=dict(
trust_remote_code=True, trust_remote_code=True,
device_map='auto', device_map='auto',
...@@ -27,6 +26,6 @@ models = [ ...@@ -27,6 +26,6 @@ models = [
max_seq_len=2048, max_seq_len=2048,
batch_size=8, batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1), run_cfg=dict(num_gpus=1, num_procs=1),
end_str='<用户>', batch_padding=True,
) )
] ]
from opencompass.models import HuggingFaceCausalLM
_meta_template = dict(
round=[
dict(role="HUMAN", begin='<|START_OF_TURN_TOKEN|><|USER_TOKEN|>', end='<|END_OF_TURN_TOKEN|>'),
dict(role="BOT", begin="<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", end='<|END_OF_TURN_TOKEN|>', generate=True),
],
)
models = [
dict(
type=HuggingFaceCausalLM,
abbr='command-r-plus-hf',
path="CohereForAI/c4ai-command-r-plus",
model_kwargs=dict(device_map='auto', trust_remote_code=True),
tokenizer_kwargs=dict(padding_side='left', truncation_side='left', trust_remote_code=True),
meta_template=_meta_template,
max_out_len=100,
max_seq_len=2048,
batch_size=8,
run_cfg=dict(num_gpus=8, num_procs=1),
end_str='<|END_OF_TURN_TOKEN|>',
batch_padding=True,
)
]
...@@ -29,7 +29,6 @@ models = [ ...@@ -29,7 +29,6 @@ models = [
batch_size=8, batch_size=8,
meta_template=_meta_template, meta_template=_meta_template,
run_cfg=dict(num_gpus=8, num_procs=1), run_cfg=dict(num_gpus=8, num_procs=1),
end_str='<|im_end|>',
batch_padding=True, batch_padding=True,
) )
] ]
...@@ -5,7 +5,6 @@ _meta_template = dict( ...@@ -5,7 +5,6 @@ _meta_template = dict(
dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'), dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True), dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
], ],
eos_token_id=151645,
) )
models = [ models = [
...@@ -24,11 +23,11 @@ models = [ ...@@ -24,11 +23,11 @@ models = [
use_fast=False, use_fast=False,
), ),
meta_template=_meta_template, meta_template=_meta_template,
pad_token_id=151645,
max_out_len=100, max_out_len=100,
max_seq_len=2048, max_seq_len=2048,
batch_size=8, batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1), run_cfg=dict(num_gpus=4, num_procs=1),
end_str='<|im_end|>', end_str='<|im_end|>',
batch_padding=True,
) )
] ]
...@@ -5,7 +5,6 @@ _meta_template = dict( ...@@ -5,7 +5,6 @@ _meta_template = dict(
dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'), dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True), dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
], ],
eos_token_id=151645,
) )
models = [ models = [
...@@ -24,11 +23,11 @@ models = [ ...@@ -24,11 +23,11 @@ models = [
use_fast=False, use_fast=False,
), ),
meta_template=_meta_template, meta_template=_meta_template,
pad_token_id=151645,
max_out_len=100, max_out_len=100,
max_seq_len=2048, max_seq_len=2048,
batch_size=8, batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1), run_cfg=dict(num_gpus=4, num_procs=1),
end_str='<|im_end|>', end_str='<|im_end|>',
batch_padding=True,
) )
] ]
...@@ -5,7 +5,6 @@ _meta_template = dict( ...@@ -5,7 +5,6 @@ _meta_template = dict(
dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'), dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True), dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
], ],
eos_token_id=151645,
) )
models = [ models = [
...@@ -24,11 +23,11 @@ models = [ ...@@ -24,11 +23,11 @@ models = [
use_fast=False, use_fast=False,
), ),
meta_template=_meta_template, meta_template=_meta_template,
pad_token_id=151645,
max_out_len=100, max_out_len=100,
max_seq_len=2048, max_seq_len=2048,
batch_size=8, batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1), run_cfg=dict(num_gpus=4, num_procs=1),
end_str='<|im_end|>', end_str='<|im_end|>',
batch_padding=True,
) )
] ]
from opencompass.models import HuggingFaceCausalLM
models = [
dict(
type=HuggingFaceCausalLM,
abbr='qwen1.5-32b-hf',
path="Qwen/Qwen1.5-32B",
tokenizer_path='Qwen/Qwen1.5-32B',
model_kwargs=dict(
device_map='auto',
trust_remote_code=True
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
use_fast=False,
),
pad_token_id=151645,
max_out_len=100,
max_seq_len=2048,
batch_size=8,
run_cfg=dict(num_gpus=2, num_procs=1),
)
]
from opencompass.models import HuggingFaceCausalLM
_meta_template = dict(
round=[
dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
],
)
models = [
dict(
type=HuggingFaceCausalLM,
abbr='qwen1.5-32b-chat-hf',
path="Qwen/Qwen1.5-32B-Chat",
model_kwargs=dict(
device_map='auto',
trust_remote_code=True
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
use_fast=False,
),
meta_template=_meta_template,
max_out_len=100,
max_seq_len=2048,
batch_size=8,
run_cfg=dict(num_gpus=2, num_procs=1),
end_str='<|im_end|>',
batch_padding=True,
)
]
...@@ -5,7 +5,6 @@ _meta_template = dict( ...@@ -5,7 +5,6 @@ _meta_template = dict(
dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'), dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True), dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
], ],
eos_token_id=151645,
) )
models = [ models = [
...@@ -24,11 +23,11 @@ models = [ ...@@ -24,11 +23,11 @@ models = [
use_fast=False, use_fast=False,
), ),
meta_template=_meta_template, meta_template=_meta_template,
pad_token_id=151645,
max_out_len=100, max_out_len=100,
max_seq_len=2048, max_seq_len=2048,
batch_size=8, batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1), run_cfg=dict(num_gpus=4, num_procs=1),
end_str='<|im_end|>', end_str='<|im_end|>',
batch_padding=True,
) )
] ]
...@@ -5,7 +5,6 @@ _meta_template = dict( ...@@ -5,7 +5,6 @@ _meta_template = dict(
dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'), dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True), dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
], ],
eos_token_id=151645,
) )
models = [ models = [
...@@ -24,11 +23,11 @@ models = [ ...@@ -24,11 +23,11 @@ models = [
use_fast=False, use_fast=False,
), ),
meta_template=_meta_template, meta_template=_meta_template,
pad_token_id=151645,
max_out_len=100, max_out_len=100,
max_seq_len=2048, max_seq_len=2048,
batch_size=8, batch_size=8,
run_cfg=dict(num_gpus=4, num_procs=1), run_cfg=dict(num_gpus=4, num_procs=1),
end_str='<|im_end|>', end_str='<|im_end|>',
batch_padding=True,
) )
] ]
...@@ -5,7 +5,6 @@ _meta_template = dict( ...@@ -5,7 +5,6 @@ _meta_template = dict(
dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'), dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True), dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
], ],
eos_token_id=151645,
) )
models = [ models = [
...@@ -24,11 +23,11 @@ models = [ ...@@ -24,11 +23,11 @@ models = [
use_fast=False, use_fast=False,
), ),
meta_template=_meta_template, meta_template=_meta_template,
pad_token_id=151645,
max_out_len=100, max_out_len=100,
max_seq_len=2048, max_seq_len=2048,
batch_size=8, batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1), run_cfg=dict(num_gpus=4, num_procs=1),
end_str='<|im_end|>', end_str='<|im_end|>',
batch_padding=True,
) )
] ]
...@@ -12,7 +12,6 @@ models = [ ...@@ -12,7 +12,6 @@ models = [
type=HuggingFace, type=HuggingFace,
abbr='yi-34b-chat-hf', abbr='yi-34b-chat-hf',
path='01-ai/Yi-34B-Chat', path='01-ai/Yi-34B-Chat',
tokenizer_path='01-ai/Yi-34B-Chat',
model_kwargs=dict( model_kwargs=dict(
trust_remote_code=True, trust_remote_code=True,
device_map='auto', device_map='auto',
...@@ -26,7 +25,8 @@ models = [ ...@@ -26,7 +25,8 @@ models = [
max_out_len=100, max_out_len=100,
max_seq_len=2048, max_seq_len=2048,
batch_size=8, batch_size=8,
run_cfg=dict(num_gpus=4, num_procs=1), run_cfg=dict(num_gpus=2, num_procs=1),
end_str='<|im_end|>', end_str='<|im_end|>',
batch_padding=True,
) )
] ]
...@@ -28,5 +28,6 @@ models = [ ...@@ -28,5 +28,6 @@ models = [
batch_size=8, batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1), run_cfg=dict(num_gpus=1, num_procs=1),
end_str='<|im_end|>', end_str='<|im_end|>',
batch_padding=True,
) )
] ]
...@@ -17,7 +17,7 @@ def create_m_rs_names_list(context_lengths, depths, needle_counts, ...@@ -17,7 +17,7 @@ def create_m_rs_names_list(context_lengths, depths, needle_counts,
for depth in depths for depth in depths
] ]
names_dict[key] = names_list names_dict[key] = names_list
multi_needle_list.extend(names_list) multi_needle_list.extend(names_list)
if language == 'en': if language == 'en':
multi_needle_en_list.extend(names_list) multi_needle_en_list.extend(names_list)
...@@ -29,7 +29,7 @@ def create_m_rs_names_list(context_lengths, depths, needle_counts, ...@@ -29,7 +29,7 @@ def create_m_rs_names_list(context_lengths, depths, needle_counts,
return names_dict return names_dict
def create_summarizer(context_lengths, depths, dataset_size, def create_summarizer(context_lengths, depths, dataset_size,
sparse_depths=None): sparse_depths=None):
needle_counts = ["2", "3", "4", "5"] needle_counts = ["2", "3", "4", "5"]
languages = ["en", "zh"] languages = ["en", "zh"]
...@@ -40,7 +40,7 @@ def create_summarizer(context_lengths, depths, dataset_size, ...@@ -40,7 +40,7 @@ def create_summarizer(context_lengths, depths, dataset_size,
context_lengths, depths, needle_counts, languages, dataset_size) context_lengths, depths, needle_counts, languages, dataset_size)
names_dict.update(multi_reasoning_names) names_dict.update(multi_reasoning_names)
single_needle_list = [] single_needle_list = []
single_needle_en_list = [] single_needle_en_list = []
single_needle_zh_list = [] single_needle_zh_list = []
...@@ -133,6 +133,8 @@ context_lengths_128k = list([16000, 32000, 48000, 64000, 80000, 96000, 112000, 1 ...@@ -133,6 +133,8 @@ context_lengths_128k = list([16000, 32000, 48000, 64000, 80000, 96000, 112000, 1
needlebench_128k_summarizer = create_summarizer(context_lengths_128k, depths_list_sparse, "128k") needlebench_128k_summarizer = create_summarizer(context_lengths_128k, depths_list_sparse, "128k")
context_lengths_200k = list([16000, 48000, 80000, 112000, 128000, 144000, 176000, 200000]) context_lengths_200k = list([16000, 48000, 80000, 112000, 128000, 144000, 176000, 200000])
needlebench_200k_summarizer = create_summarizer(context_lengths_200k, depths_list_sparse, "200k") needlebench_200k_summarizer = create_summarizer(context_lengths_200k, depths_list_sparse, "200k")
context_lengths_256k = list([32000, 128000, 256000])
needlebench_256k_summarizer = create_summarizer(context_lengths_256k, depths_list_sparse, "256k")
context_lengths_1000k = list([20000, 160000, 300000, 440000, 580000, 720000, 860000, 1000000]) context_lengths_1000k = list([20000, 160000, 300000, 440000, 580000, 720000, 860000, 1000000])
needlebench_1000k_summarizer = create_summarizer(context_lengths_1000k, depths_list_sparse, "1000k") needlebench_1000k_summarizer = create_summarizer(context_lengths_1000k, depths_list_sparse, "1000k")
......
...@@ -4,7 +4,7 @@ This tutorial primarily focuses on evaluating a model's coding proficiency, usin ...@@ -4,7 +4,7 @@ This tutorial primarily focuses on evaluating a model's coding proficiency, usin
## pass@1 ## pass@1
If you only need to generate a single response to evaluate the pass@1 performance, you can directly use [configs/datasets/humaneval/humaneval_gen_8e312c.py](https://github.com/open-compass/opencompass/blob/main/configs/datasets/humaneval/humaneval_gen_8e312c.py) and [configs/datasets/mbpp/mbpp_gen_1e1056.py](https://github.com/open-compass/opencompass/blob/main/configs/datasets/mbpp/mbpp_gen_1e1056.py), referring to the general [quick start tutorial](../get_started/quick_start.md). If you only need to generate a single response to evaluate the pass@1 performance, you can directly use [configs/datasets/humaneval/humaneval_gen_8e312c.py](https://github.com/open-compass/opencompass/blob/main/configs/datasets/humaneval/humaneval_gen_8e312c.py) and [configs/datasets/mbpp/deprecated_mbpp_gen_1e1056.py](https://github.com/open-compass/opencompass/blob/main/configs/datasets/mbpp/deprecated_mbpp_gen_1e1056.py), referring to the general [quick start tutorial](../get_started/quick_start.md).
For multilingual evaluation, please refer to the [Multilingual Code Evaluation Tutorial](./code_eval_service.md). For multilingual evaluation, please refer to the [Multilingual Code Evaluation Tutorial](./code_eval_service.md).
...@@ -21,7 +21,7 @@ from opencompass.datasets import MBPPDataset_V2, MBPPPassKEvaluator ...@@ -21,7 +21,7 @@ from opencompass.datasets import MBPPDataset_V2, MBPPPassKEvaluator
with read_base(): with read_base():
from .datasets.humaneval.humaneval_gen_8e312c import humaneval_datasets from .datasets.humaneval.humaneval_gen_8e312c import humaneval_datasets
from .datasets.mbpp.mbpp_gen_1e1056 import mbpp_datasets from .datasets.mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
mbpp_datasets[0]['type'] = MBPPDataset_V2 mbpp_datasets[0]['type'] = MBPPDataset_V2
mbpp_datasets[0]['eval_cfg']['evaluator']['type'] = MBPPPassKEvaluator mbpp_datasets[0]['eval_cfg']['evaluator']['type'] = MBPPPassKEvaluator
...@@ -63,7 +63,7 @@ from opencompass.datasets import MBPPDataset_V2, MBPPPassKEvaluator ...@@ -63,7 +63,7 @@ from opencompass.datasets import MBPPDataset_V2, MBPPPassKEvaluator
with read_base(): with read_base():
from .datasets.humaneval.humaneval_gen_8e312c import humaneval_datasets from .datasets.humaneval.humaneval_gen_8e312c import humaneval_datasets
from .datasets.mbpp.mbpp_gen_1e1056 import mbpp_datasets from .datasets.mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
humaneval_datasets[0]['abbr'] = 'openai_humaneval_pass10' humaneval_datasets[0]['abbr'] = 'openai_humaneval_pass10'
humaneval_datasets[0]['num_repeats'] = 10 humaneval_datasets[0]['num_repeats'] = 10
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment