Unverified Commit 8c85edd1 authored by Fengzhe Zhou's avatar Fengzhe Zhou Committed by GitHub
Browse files

[Sync] deprecate old mbpps (#1064)

parent c1724013
......@@ -2,8 +2,8 @@ from opencompass.models import HuggingFaceCausalLM
_meta_template = dict(
round=[
dict(role="HUMAN", begin=' [INST] ', end=' [/INST] '),
dict(role="BOT", begin='', end='', generate=True),
dict(role="HUMAN", begin='[INST] ', end=' [/INST]'),
dict(role="BOT", begin=' ', end=' ', generate=True),
],
)
......@@ -27,5 +27,6 @@ models = [
batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1),
end_str='[INST]',
batch_padding=True,
)
]
......@@ -4,10 +4,9 @@ from opencompass.models import HuggingFaceCausalLM
_meta_template = dict(
begin="<s>",
round=[
dict(role="HUMAN", begin='[INST]', end='[/INST]'),
dict(role="BOT", begin="", end='</s>', generate=True),
dict(role="HUMAN", begin='[INST] ', end=' [/INST]'),
dict(role="BOT", begin="", end='</s> ', generate=True),
],
eos_token_id=2
)
models = [
......@@ -30,5 +29,6 @@ models = [
max_seq_len=2048,
batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1),
batch_padding=True,
)
]
......@@ -4,10 +4,9 @@ from opencompass.models import HuggingFaceCausalLM
_meta_template = dict(
begin="<s>",
round=[
dict(role="HUMAN", begin='[INST]', end='[/INST]'),
dict(role="BOT", begin="", end='</s>', generate=True),
dict(role="HUMAN", begin='[INST] ', end=' [/INST]'),
dict(role="BOT", begin="", end='</s> ', generate=True),
],
eos_token_id=2
)
models = [
......@@ -30,6 +29,6 @@ models = [
max_seq_len=2048,
batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1),
end_str='</s>',
batch_padding=True,
)
]
......@@ -4,10 +4,9 @@ from opencompass.models import HuggingFaceCausalLM
_meta_template = dict(
begin="<s>",
round=[
dict(role="HUMAN", begin='[INST]', end='[/INST]'),
dict(role="BOT", begin="", end='</s>', generate=True),
dict(role="HUMAN", begin='[INST] ', end=' [/INST]'),
dict(role="BOT", begin="", end='</s> ', generate=True),
],
eos_token_id=2
)
models = [
......@@ -30,6 +29,6 @@ models = [
max_seq_len=2048,
batch_size=8,
run_cfg=dict(num_gpus=2, num_procs=1),
end_str='</s>',
batch_padding=True,
)
]
......@@ -12,7 +12,6 @@ models = [
type=HuggingFace,
abbr='minicpm-2b-dpo-hf',
path='openbmb/MiniCPM-2B-dpo-fp32',
tokenizer_path='openbmb/MiniCPM-2B-dpo-fp32',
model_kwargs=dict(
trust_remote_code=True,
device_map='auto',
......@@ -27,6 +26,6 @@ models = [
max_seq_len=2048,
batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1),
end_str='<用户>',
batch_padding=True,
)
]
......@@ -12,7 +12,6 @@ models = [
type=HuggingFace,
abbr='minicpm-2b-sft-hf',
path='openbmb/MiniCPM-2B-sft-fp32',
tokenizer_path='openbmb/MiniCPM-2B-sft-fp32',
model_kwargs=dict(
trust_remote_code=True,
device_map='auto',
......@@ -27,6 +26,6 @@ models = [
max_seq_len=2048,
batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1),
end_str='<用户>',
batch_padding=True,
)
]
from opencompass.models import HuggingFaceCausalLM
_meta_template = dict(
round=[
dict(role="HUMAN", begin='<|START_OF_TURN_TOKEN|><|USER_TOKEN|>', end='<|END_OF_TURN_TOKEN|>'),
dict(role="BOT", begin="<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", end='<|END_OF_TURN_TOKEN|>', generate=True),
],
)
models = [
dict(
type=HuggingFaceCausalLM,
abbr='command-r-plus-hf',
path="CohereForAI/c4ai-command-r-plus",
model_kwargs=dict(device_map='auto', trust_remote_code=True),
tokenizer_kwargs=dict(padding_side='left', truncation_side='left', trust_remote_code=True),
meta_template=_meta_template,
max_out_len=100,
max_seq_len=2048,
batch_size=8,
run_cfg=dict(num_gpus=8, num_procs=1),
end_str='<|END_OF_TURN_TOKEN|>',
batch_padding=True,
)
]
......@@ -29,7 +29,6 @@ models = [
batch_size=8,
meta_template=_meta_template,
run_cfg=dict(num_gpus=8, num_procs=1),
end_str='<|im_end|>',
batch_padding=True,
)
]
......@@ -5,7 +5,6 @@ _meta_template = dict(
dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
],
eos_token_id=151645,
)
models = [
......@@ -24,11 +23,11 @@ models = [
use_fast=False,
),
meta_template=_meta_template,
pad_token_id=151645,
max_out_len=100,
max_seq_len=2048,
batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1),
run_cfg=dict(num_gpus=4, num_procs=1),
end_str='<|im_end|>',
batch_padding=True,
)
]
......@@ -5,7 +5,6 @@ _meta_template = dict(
dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
],
eos_token_id=151645,
)
models = [
......@@ -24,11 +23,11 @@ models = [
use_fast=False,
),
meta_template=_meta_template,
pad_token_id=151645,
max_out_len=100,
max_seq_len=2048,
batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1),
run_cfg=dict(num_gpus=4, num_procs=1),
end_str='<|im_end|>',
batch_padding=True,
)
]
......@@ -5,7 +5,6 @@ _meta_template = dict(
dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
],
eos_token_id=151645,
)
models = [
......@@ -24,11 +23,11 @@ models = [
use_fast=False,
),
meta_template=_meta_template,
pad_token_id=151645,
max_out_len=100,
max_seq_len=2048,
batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1),
run_cfg=dict(num_gpus=4, num_procs=1),
end_str='<|im_end|>',
batch_padding=True,
)
]
from opencompass.models import HuggingFaceCausalLM
models = [
dict(
type=HuggingFaceCausalLM,
abbr='qwen1.5-32b-hf',
path="Qwen/Qwen1.5-32B",
tokenizer_path='Qwen/Qwen1.5-32B',
model_kwargs=dict(
device_map='auto',
trust_remote_code=True
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
use_fast=False,
),
pad_token_id=151645,
max_out_len=100,
max_seq_len=2048,
batch_size=8,
run_cfg=dict(num_gpus=2, num_procs=1),
)
]
from opencompass.models import HuggingFaceCausalLM
_meta_template = dict(
round=[
dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
],
)
models = [
dict(
type=HuggingFaceCausalLM,
abbr='qwen1.5-32b-chat-hf',
path="Qwen/Qwen1.5-32B-Chat",
model_kwargs=dict(
device_map='auto',
trust_remote_code=True
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
use_fast=False,
),
meta_template=_meta_template,
max_out_len=100,
max_seq_len=2048,
batch_size=8,
run_cfg=dict(num_gpus=2, num_procs=1),
end_str='<|im_end|>',
batch_padding=True,
)
]
......@@ -5,7 +5,6 @@ _meta_template = dict(
dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
],
eos_token_id=151645,
)
models = [
......@@ -24,11 +23,11 @@ models = [
use_fast=False,
),
meta_template=_meta_template,
pad_token_id=151645,
max_out_len=100,
max_seq_len=2048,
batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1),
run_cfg=dict(num_gpus=4, num_procs=1),
end_str='<|im_end|>',
batch_padding=True,
)
]
......@@ -5,7 +5,6 @@ _meta_template = dict(
dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
],
eos_token_id=151645,
)
models = [
......@@ -24,11 +23,11 @@ models = [
use_fast=False,
),
meta_template=_meta_template,
pad_token_id=151645,
max_out_len=100,
max_seq_len=2048,
batch_size=8,
run_cfg=dict(num_gpus=4, num_procs=1),
end_str='<|im_end|>',
batch_padding=True,
)
]
......@@ -5,7 +5,6 @@ _meta_template = dict(
dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
],
eos_token_id=151645,
)
models = [
......@@ -24,11 +23,11 @@ models = [
use_fast=False,
),
meta_template=_meta_template,
pad_token_id=151645,
max_out_len=100,
max_seq_len=2048,
batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1),
run_cfg=dict(num_gpus=4, num_procs=1),
end_str='<|im_end|>',
batch_padding=True,
)
]
......@@ -12,7 +12,6 @@ models = [
type=HuggingFace,
abbr='yi-34b-chat-hf',
path='01-ai/Yi-34B-Chat',
tokenizer_path='01-ai/Yi-34B-Chat',
model_kwargs=dict(
trust_remote_code=True,
device_map='auto',
......@@ -26,7 +25,8 @@ models = [
max_out_len=100,
max_seq_len=2048,
batch_size=8,
run_cfg=dict(num_gpus=4, num_procs=1),
run_cfg=dict(num_gpus=2, num_procs=1),
end_str='<|im_end|>',
batch_padding=True,
)
]
......@@ -28,5 +28,6 @@ models = [
batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1),
end_str='<|im_end|>',
batch_padding=True,
)
]
......@@ -17,7 +17,7 @@ def create_m_rs_names_list(context_lengths, depths, needle_counts,
for depth in depths
]
names_dict[key] = names_list
multi_needle_list.extend(names_list)
if language == 'en':
multi_needle_en_list.extend(names_list)
......@@ -29,7 +29,7 @@ def create_m_rs_names_list(context_lengths, depths, needle_counts,
return names_dict
def create_summarizer(context_lengths, depths, dataset_size,
def create_summarizer(context_lengths, depths, dataset_size,
sparse_depths=None):
needle_counts = ["2", "3", "4", "5"]
languages = ["en", "zh"]
......@@ -40,7 +40,7 @@ def create_summarizer(context_lengths, depths, dataset_size,
context_lengths, depths, needle_counts, languages, dataset_size)
names_dict.update(multi_reasoning_names)
single_needle_list = []
single_needle_en_list = []
single_needle_zh_list = []
......@@ -133,6 +133,8 @@ context_lengths_128k = list([16000, 32000, 48000, 64000, 80000, 96000, 112000, 1
needlebench_128k_summarizer = create_summarizer(context_lengths_128k, depths_list_sparse, "128k")
context_lengths_200k = list([16000, 48000, 80000, 112000, 128000, 144000, 176000, 200000])
needlebench_200k_summarizer = create_summarizer(context_lengths_200k, depths_list_sparse, "200k")
context_lengths_256k = list([32000, 128000, 256000])
needlebench_256k_summarizer = create_summarizer(context_lengths_256k, depths_list_sparse, "256k")
context_lengths_1000k = list([20000, 160000, 300000, 440000, 580000, 720000, 860000, 1000000])
needlebench_1000k_summarizer = create_summarizer(context_lengths_1000k, depths_list_sparse, "1000k")
......
......@@ -4,7 +4,7 @@ This tutorial primarily focuses on evaluating a model's coding proficiency, usin
## pass@1
If you only need to generate a single response to evaluate the pass@1 performance, you can directly use [configs/datasets/humaneval/humaneval_gen_8e312c.py](https://github.com/open-compass/opencompass/blob/main/configs/datasets/humaneval/humaneval_gen_8e312c.py) and [configs/datasets/mbpp/mbpp_gen_1e1056.py](https://github.com/open-compass/opencompass/blob/main/configs/datasets/mbpp/mbpp_gen_1e1056.py), referring to the general [quick start tutorial](../get_started/quick_start.md).
If you only need to generate a single response to evaluate the pass@1 performance, you can directly use [configs/datasets/humaneval/humaneval_gen_8e312c.py](https://github.com/open-compass/opencompass/blob/main/configs/datasets/humaneval/humaneval_gen_8e312c.py) and [configs/datasets/mbpp/deprecated_mbpp_gen_1e1056.py](https://github.com/open-compass/opencompass/blob/main/configs/datasets/mbpp/deprecated_mbpp_gen_1e1056.py), referring to the general [quick start tutorial](../get_started/quick_start.md).
For multilingual evaluation, please refer to the [Multilingual Code Evaluation Tutorial](./code_eval_service.md).
......@@ -21,7 +21,7 @@ from opencompass.datasets import MBPPDataset_V2, MBPPPassKEvaluator
with read_base():
from .datasets.humaneval.humaneval_gen_8e312c import humaneval_datasets
from .datasets.mbpp.mbpp_gen_1e1056 import mbpp_datasets
from .datasets.mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
mbpp_datasets[0]['type'] = MBPPDataset_V2
mbpp_datasets[0]['eval_cfg']['evaluator']['type'] = MBPPPassKEvaluator
......@@ -63,7 +63,7 @@ from opencompass.datasets import MBPPDataset_V2, MBPPPassKEvaluator
with read_base():
from .datasets.humaneval.humaneval_gen_8e312c import humaneval_datasets
from .datasets.mbpp.mbpp_gen_1e1056 import mbpp_datasets
from .datasets.mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
humaneval_datasets[0]['abbr'] = 'openai_humaneval_pass10'
humaneval_datasets[0]['num_repeats'] = 10
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment