[Sync] Sync with internal codes 2023.01.08 (#777)

32f40a8f · Fengzhe Zhou · GitHub · 8194199d · 32f40a8f · 32f40a8f
Unverified Commit 32f40a8f authored Jan 08, 2024 by Fengzhe Zhou Committed by GitHub Jan 08, 2024
20 changed files
--- a/configs/models/hf_llama/hf_llama2_70b.py
+++ b/configs/models/hf_llama/hf_llama2_70b.py
@@ -16,6 +16,6 @@ models = [
        batch_size=8,
        model_kwargs=dict(device_map='auto'),
        batch_padding=False, # if false, inference with for-loop without batch padding
-        run_cfg=dict(num_gpus=8, num_procs=1),
+        run_cfg=dict(num_gpus=4, num_procs=1),
    )
 ]
--- a/configs/models/hf_llama/hf_llama2_70b_chat.py
+++ b/configs/models/hf_llama/hf_llama2_70b_chat.py
+from opencompass.models import HuggingFaceCausalLM
+
+_meta_template = dict(
+    round=[
+        dict(role="HUMAN", begin=' [INST] ', end=' [/INST] '),
+        dict(role="BOT", begin='', end='', generate=True),
+    ],
+)
+
+models = [
+    dict(
+        type=HuggingFaceCausalLM,
+        abbr='llama-2-70b-chat-hf',
+        path="meta-llama/Llama-2-70b-chat-hf",
+        tokenizer_path='meta-llama/Llama-2-70b-chat-hf',
+        model_kwargs=dict(
+            device_map='auto'
+        ),
+        tokenizer_kwargs=dict(
+            padding_side='left',
+            truncation_side='left',
+            use_fast=False,
+        ),
+        meta_template=_meta_template,
+        max_out_len=100,
+        max_seq_len=2048,
+        batch_size=8,
+        run_cfg=dict(num_gpus=4, num_procs=1),
+        end_str='[INST]',
+    )
+]
--- a/configs/models/hf_llama/hf_llama2_7b_chat.py
+++ b/configs/models/hf_llama/hf_llama2_7b_chat.py
+from opencompass.models import HuggingFaceCausalLM
+
+_meta_template = dict(
+    round=[
+        dict(role="HUMAN", begin=' [INST] ', end=' [/INST] '),
+        dict(role="BOT", begin='', end='', generate=True),
+    ],
+)
+
+models = [
+    dict(
+        type=HuggingFaceCausalLM,
+        abbr='llama-2-7b-chat-hf',
+        path="meta-llama/Llama-2-7b-chat-hf",
+        tokenizer_path='meta-llama/Llama-2-7b-chat-hf',
+        model_kwargs=dict(
+            device_map='auto'
+        ),
+        tokenizer_kwargs=dict(
+            padding_side='left',
+            truncation_side='left',
+            use_fast=False,
+        ),
+        meta_template=_meta_template,
+        max_out_len=100,
+        max_seq_len=2048,
+        batch_size=8,
+        run_cfg=dict(num_gpus=1, num_procs=1),
+        end_str='[INST]',
+    )
+]
--- a/configs/models/hf_llama/hf_llama_65b.py
+++ b/configs/models/hf_llama/hf_llama_65b.py
@@ -16,6 +16,6 @@ models = [
        batch_size=8,
        model_kwargs=dict(device_map='auto'),
        batch_padding=False, # if false, inference with for-loop without batch padding
-        run_cfg=dict(num_gpus=8, num_procs=1),
+        run_cfg=dict(num_gpus=4, num_procs=1),
    )
 ]
--- a/configs/models/lemur/lemur_70b_chat.py
+++ b/configs/models/lemur/lemur_70b_chat.py
+from opencompass.models import HuggingFaceCausalLM
+
+
+_meta_template = dict(
+    round=[
+        dict(role="HUMAN", begin='\n<|im_start|>user\n', end='<|im_end|>'),
+        dict(role="BOT", begin="\n<|im_start|>assistant\n", end='<|im_end|>', generate=True),
+    ],
+)
+
+models = [
+    dict(
+        type=HuggingFaceCausalLM,
+        abbr='lemur-70b-chat-v1',
+        path="OpenLemur/lemur-70b-chat-v1",
+        tokenizer_path='OpenLemur/lemur-70b-chat-v1',
+        # tokenizer_kwargs=dict(
+        #     padding_side='left',
+        #     truncation_side='left',
+        #     trust_remote_code=True,
+        #     use_fast=False,),
+        # pad_token_id=151643,
+        max_out_len=100,
+        max_seq_len=2048,
+        batch_size=8,
+        meta_template=_meta_template,
+        model_kwargs=dict(device_map='auto', trust_remote_code=True),
+        run_cfg=dict(num_gpus=4, num_procs=1),
+    )
+]
--- a/configs/models/mistral/hf_mistral_7b_instruct.py
+++ b/configs/models/mistral/hf_mistral_7b_instruct.py
--- a/configs/models/mistral/hf_mistral_7b_instruct_v02.py
+++ b/configs/models/mistral/hf_mistral_7b_instruct_v02.py
@@ -30,5 +30,6 @@ models = [
        max_seq_len=2048,
        batch_size=8,
        run_cfg=dict(num_gpus=1, num_procs=1),
+        end_str='</s>',
    )
 ]
--- a/configs/models/mistral/hf_mistral_7b.py
+++ b/configs/models/mistral/hf_mistral_7b.py
--- a/configs/models/mistral/vllm_mistral_7b_instruct_v0_2.py
+++ b/configs/models/mistral/vllm_mistral_7b_instruct_v0_2.py
+from opencompass.models import VLLM
+
+
+_meta_template = dict(
+    begin="<s>",
+    round=[
+        dict(role="HUMAN", begin='[INST]', end='[/INST]'),
+        dict(role="BOT", begin="", end='</s>', generate=True),
+    ],
+    eos_token_id=2
+)
+
+models = [
+    dict(
+        type=VLLM,
+        abbr='mistral-7b-instruct-v0.2-vllm',
+        path='mistralai/Mistral-7B-Instruct-v0.2',
+        meta_template=_meta_template,
+        max_out_len=100,
+        max_seq_len=2048,
+        batch_size=32,
+        generation_kwargs=dict(temperature=0),
+        end_str='</s>',
+        run_cfg=dict(num_gpus=1, num_procs=1),
+    )
+]
--- a/configs/models/mixtral/hf_mixtral_8x7b_instruct_v01.py
+++ b/configs/models/mixtral/hf_mixtral_8x7b_instruct_v01.py
@@ -29,6 +29,7 @@ models = [
        max_out_len=100,
        max_seq_len=2048,
        batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=2, num_procs=1),
+        end_str='</s>',
    )
 ]
--- a/configs/models/mixtral/hf_mixtral_8x7b_v01.py
+++ b/configs/models/mixtral/hf_mixtral_8x7b_v01.py
@@ -19,6 +19,6 @@ models = [
        max_out_len=100,
        max_seq_len=2048,
        batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=2, num_procs=1),
    )
 ]
--- a/configs/models/mixtral/vllm_mixtral_8x7b_instruct_v0_1.py
+++ b/configs/models/mixtral/vllm_mixtral_8x7b_instruct_v0_1.py
+from opencompass.models import VLLM
+
+
+_meta_template = dict(
+    begin="<s>",
+    round=[
+        dict(role="HUMAN", begin='[INST]', end='[/INST]'),
+        dict(role="BOT", begin="", end='</s>', generate=True),
+    ],
+    eos_token_id=2
+)
+
+models = [
+    dict(
+        type=VLLM,
+        abbr='mixtral-8x7b-instruct-v0.1-vllm',
+        path='mistralai/Mixtral-8x7B-Instruct-v0.1',
+        model_kwargs=dict(tensor_parallel_size=2),
+        meta_template=_meta_template,
+        max_out_len=100,
+        max_seq_len=2048,
+        batch_size=32,
+        generation_kwargs=dict(temperature=0),
+        end_str='</s>',
+        run_cfg=dict(num_gpus=2, num_procs=1),
+    )
+]
--- a/configs/models/others/hf_orionstar_yi_34b_chat.py
+++ b/configs/models/others/hf_orionstar_yi_34b_chat.py
@@ -30,5 +30,6 @@ models = [
        max_seq_len=2048,
        batch_size=8,
        run_cfg=dict(num_gpus=4, num_procs=1),
+        end_str='<|endoftext|>',
    )
 ]
--- a/configs/models/qwen/hf_qwen_72b_chat.py
+++ b/configs/models/qwen/hf_qwen_72b_chat.py
 from opencompass.models import HuggingFaceCausalLM

+
 _meta_template = dict(
    round=[
        dict(role="HUMAN", begin='\n<|im_start|>user\n', end='<|im_end|>'),
@@ -28,5 +29,6 @@ models = [
        batch_size=8,
        meta_template=_meta_template,
        run_cfg=dict(num_gpus=4, num_procs=1),
+        end_str='<|im_end|>',
    )
 ]
--- a/configs/models/qwen/vllm_qwen_72b_chat.py
+++ b/configs/models/qwen/vllm_qwen_72b_chat.py
+from opencompass.models import VLLM
+
+
+_meta_template = dict(
+    round=[
+        dict(role="HUMAN", begin='\n<|im_start|>user\n', end='<|im_end|>'),
+        dict(role="BOT", begin="\n<|im_start|>assistant\n", end='<|im_end|>', generate=True),
+    ],
+)
+
+models = [
+    dict(
+        type=VLLM,
+        abbr='qwen-72b-chat-vllm',
+        path="Qwen/Qwen-72B-Chat",
+        model_kwargs=dict(tensor_parallel_size=4),
+        meta_template=_meta_template,
+        max_out_len=100,
+        max_seq_len=2048,
+        batch_size=32,
+        generation_kwargs=dict(temperature=0),
+        end_str='<|im_end|>',
+        run_cfg=dict(num_gpus=4, num_procs=1),
+    )
+]
--- a/configs/models/vicuna/hf_vicuna_13b_v15_16k.py
+++ b/configs/models/vicuna/hf_vicuna_13b_v15_16k.py
 from opencompass.models import HuggingFaceCausalLM

+_meta_template = dict(
+    round=[
+        dict(role="HUMAN", begin='USER: '),
+        dict(role="BOT", begin=" ASSISTANT:", end='</s>', generate=True),
+    ],
+)

 models = [
    dict(
@@ -12,12 +18,13 @@ models = [
            truncation_side='left',
            use_fast=False,
        ),
+        meta_template=_meta_template,
        max_out_len=100,
        max_seq_len=8192,
        batch_size=8,
        model_kwargs=dict(device_map='auto'),
        batch_padding=False, # if false, inference with for-loop without batch padding
-        use_fastchat_template=True,
-        run_cfg=dict(num_gpus=2, num_procs=1)
+        run_cfg=dict(num_gpus=2, num_procs=1),
+        end_str='</s>',
    )
 ]
--- a/configs/models/vicuna/hf_vicuna_7b_v15_16k.py
+++ b/configs/models/vicuna/hf_vicuna_7b_v15_16k.py
 from opencompass.models import HuggingFaceCausalLM

+_meta_template = dict(
+    round=[
+        dict(role="HUMAN", begin='USER: '),
+        dict(role="BOT", begin=" ASSISTANT:", end='</s>', generate=True),
+    ],
+)

 models = [
    dict(
@@ -12,12 +18,13 @@ models = [
            truncation_side='left',
            use_fast=False,
        ),
+        meta_template=_meta_template,
        max_out_len=100,
        max_seq_len=8192,
        batch_size=8,
        model_kwargs=dict(device_map='auto'),
        batch_padding=False, # if false, inference with for-loop without batch padding
-        use_fastchat_template=True,
-        run_cfg=dict(num_gpus=1, num_procs=1)
+        run_cfg=dict(num_gpus=1, num_procs=1),
+        end_str='</s>',
    )
 ]
--- a/configs/models/vicuna/vllm_vicuna_13b_v15_16k.py
+++ b/configs/models/vicuna/vllm_vicuna_13b_v15_16k.py
+from opencompass.models import VLLM
+
+_meta_template = dict(
+    round=[
+        dict(role="HUMAN", begin='USER: '),
+        dict(role="BOT", begin=" ASSISTANT:", end='</s>', generate=True),
+    ],
+)
+
+models = [
+    dict(
+        type=VLLM,
+        abbr='vicuna-13b-v1.5-16k-vllm',
+        path="lmsys/vicuna-13b-v1.5-16k",
+        meta_template=_meta_template,
+        max_out_len=100,
+        max_seq_len=2048,
+        batch_size=32,
+        generation_kwargs=dict(temperature=0),
+        end_str='</s>',
+        run_cfg=dict(num_gpus=2, num_procs=1),
+    )
+]
--- a/configs/models/vicuna/vllm_vicuna_7b_v15_16k.py
+++ b/configs/models/vicuna/vllm_vicuna_7b_v15_16k.py
+from opencompass.models import VLLM
+
+_meta_template = dict(
+    round=[
+        dict(role="HUMAN", begin='USER: '),
+        dict(role="BOT", begin=" ASSISTANT:", end='</s>', generate=True),
+    ],
+)
+
+models = [
+    dict(
+        type=VLLM,
+        abbr='vicuna-7b-v1.5-16k-vllm',
+        path="lmsys/vicuna-7b-v1.5-16k",
+        meta_template=_meta_template,
+        max_out_len=100,
+        max_seq_len=2048,
+        batch_size=32,
+        generation_kwargs=dict(temperature=0),
+        end_str='</s>',
+        run_cfg=dict(num_gpus=1, num_procs=1),
+    )
+]
--- a/configs/models/wizardlm/hf_wizardlm_13b_v1_2.py
+++ b/configs/models/wizardlm/hf_wizardlm_13b_v1_2.py
+from opencompass.models import HuggingFaceCausalLM
+
+
+_meta_template = dict(
+    round=[
+        dict(role="HUMAN", begin='USER: ', end=' '),
+        dict(role="BOT", begin="ASSISTANT: ", end='</s>', generate=True),
+    ],
+)
+
+models = [
+    dict(
+        type=HuggingFaceCausalLM,
+        abbr='wizardlm-13b-v1.2-hf',
+        path='WizardLM/WizardLM-13B-V1.2',
+        tokenizer_path='WizardLM/WizardLM-13B-V1.2',
+        model_kwargs=dict(
+            device_map='auto',
+            trust_remote_code=True,
+        ),
+        tokenizer_kwargs=dict(
+            padding_side='left',
+            truncation_side='left',
+            trust_remote_code=True,
+        ),
+        meta_template=_meta_template,
+        max_out_len=100,
+        max_seq_len=2048,
+        batch_size=8,
+        run_cfg=dict(num_gpus=2, num_procs=1),
+        end_str='</s>',
+    )
+]