[Sync] update github workflow (#1156)

62dbf047 · Fengzhe Zhou · GitHub · aa2dd2b5 · 62dbf047 · 62dbf047
Unverified Commit 62dbf047 authored May 14, 2024 by Fengzhe Zhou Committed by GitHub May 14, 2024
20 changed files
--- a/configs/models/qwen/lmdeploy_qwen_series.py
+++ b/configs/models/qwen/lmdeploy_qwen_series.py
+from opencompass.models import TurboMindModel
+
+settings = [
+    ('qwen-1.8b-turbomind', 'Qwen/Qwen-1_8B', 1),
+    ('qwen-7b-turbomind', 'Qwen/Qwen-7B', 1),
+    ('qwen-14b-turbomind', 'Qwen/Qwen-14B', 1),
+    ('qwen-72b-turbomind', 'Qwen/Qwen-72B', 4),
+]
+
+models = []
+for abbr, path, num_gpus in settings:
+    models.append(
+        dict(
+            type=TurboMindModel,
+            abbr=abbr,
+            path=path,
+            engine_config=dict(session_len=2048, max_batch_size=16, tp=num_gpus),
+            gen_config=dict(top_k=1, temperature=1, top_p=0.9, max_new_tokens=1024),
+            max_out_len=1024,
+            max_seq_len=2048,
+            batch_size=16,
+            concurrency=16,
+            run_cfg=dict(num_gpus=num_gpus),
+        )
+    )
--- a/configs/models/yi/lmdeploy_yi_series.py
+++ b/configs/models/yi/lmdeploy_yi_series.py
+from opencompass.models import LmdeployPytorchModel
+
+settings = [
+    ('yi-6b-pytorch', '01-ai/Yi-6B', 1),
+    ('yi-34b-pytorch', '01-ai/Yi-34B', 2),
+]
+
+models = []
+for abbr, path, num_gpus in settings:
+    models.append(
+        dict(
+            type=LmdeployPytorchModel,
+            abbr=abbr,
+            path=path,
+            engine_config=dict(session_len=2048, max_batch_size=16, tp=num_gpus),
+            gen_config=dict(top_k=1, temperature=1, top_p=0.9, max_new_tokens=1024),
+            max_out_len=1024,
+            max_seq_len=2048,
+            batch_size=16,
+            concurrency=16,
+            run_cfg=dict(num_gpus=num_gpus),
+        )
+    )
--- a/configs/summarizers/compassbench_v1_objective.py
+++ b/configs/summarizers/compassbench_v1_objective.py
@@ -2,7 +2,7 @@
 from mmengine.config import read_base

 with read_base():
-    from .groups.cibench import cibench_summary_groups
+    from .groups.legacy.cibench import cibench_summary_groups
    from .groups.plugineval import plugineval_summary_groups



--- a/configs/summarizers/groups/legacy/cibench.py
+++ b/configs/summarizers/groups/legacy/cibench.py
+
+_cibench = ['Pandas', 'Matplotlib', 'Opencv', 'SciPy', 'Seaborn', 'PyTorch']
+_cibench = ['cibench_' + i for i in _cibench]
+cibench_summary_groups = [{'name': 'cibench', 'subsets': _cibench}]
+
+_cibench_template = ['lightgbm', 'matplotlib', 'nltk', 'opencv', 'pandas', 'pytorch',
+    'scipy', 'seaborn', 'sklearn', 'tensorflow']
+_cibench_template = ['cibench_template/' + i for i in _cibench_template]
+# number of total exec questions in this module
+_cibench_template_weight = {
+    'lightgbm': [30, 15, 0, 0],
+    'matplotlib': [42, 0, 0, 36],
+    'nltk': [70, 30, 20, 10],
+    'opencv': [60, 10, 0, 40],
+    'pandas': [60, 40, 0, 10],
+    'pytorch': [28, 0, 0, 0],
+    'scipy': [60, 40, 0, 0],
+    'seaborn': [42, 0, 0, 35],
+    'sklearn': [42, 6, 0, 18],
+    'tensorflow': [36, 6, 0, 12],
+}
+cibench_summary_groups.extend([
+    {
+        'name': 'cibench_template:executable',
+        'subsets': [[i, 'executable'] for i in _cibench_template],
+        'weights': {'cibench_template/' + k : v[0] for k,v in _cibench_template_weight.items()},
+    },
+    {
+        'name': 'cibench_template:numeric_correct',
+        'subsets': [[i, 'numeric_correct'] for i in _cibench_template],
+        'weights': {'cibench_template/' + k : v[1] for k,v in _cibench_template_weight.items()},
+    },
+    {
+        'name': 'cibench_template:text_score',
+        'subsets': [[i, 'text_score'] for i in _cibench_template],
+        'weights': {'cibench_template/' + k : v[2] for k,v in _cibench_template_weight.items()},
+    },
+    {
+        'name': 'cibench_template:vis_sim',
+        'subsets': [[i, 'vis_sim'] for i in _cibench_template],
+        'weights': {'cibench_template/' + k : v[3] for k,v in _cibench_template_weight.items()},
+    },
+])
+
+
+## chinese
+_cibench_template_cn = ['lightgbm', 'matplotlib', 'nltk', 'opencv', 'pandas', 'pytorch',
+    'scipy', 'seaborn', 'sklearn', 'tensorflow']
+_cibench_template_cn = ['cibench_template_chinese/' + i for i in _cibench_template_cn]
+cibench_summary_groups.extend([
+    {
+        'name': 'cibench_template_cn:executable',
+        'subsets': [[i, 'executable'] for i in _cibench_template_cn],
+        'weights': {'cibench_template_chinese/' + k : v[0] for k,v in _cibench_template_weight.items()},
+    },
+    {
+        'name': 'cibench_template_cn:numeric_correct',
+        'subsets': [[i, 'numeric_correct'] for i in _cibench_template_cn],
+        'weights': {'cibench_template_chinese/' + k : v[1] for k,v in _cibench_template_weight.items()},
+    },
+    {
+        'name': 'cibench_template_cn:text_score',
+        'subsets': [[i, 'text_score'] for i in _cibench_template_cn],
+        'weights': {'cibench_template_chinese/' + k : v[2] for k,v in _cibench_template_weight.items()},
+    },
+    {
+        'name': 'cibench_template_cn:vis_sim',
+        'subsets': [[i, 'vis_sim'] for i in _cibench_template_cn],
+        'weights': {'cibench_template_chinese/' + k : v[3] for k,v in _cibench_template_weight.items()},
+    },
+])
+
+
+## add more without nltk
+cibench_summary_groups.extend([
+    {
+        'name': 'cibench_template_wo_nltk:executable',
+        'subsets': [[i, 'executable'] for i in _cibench_template if 'nltk' not in i],
+        'weights': {'cibench_template/' + k : v[0] for k,v in _cibench_template_weight.items() if 'nltk' not in k},
+    },
+    {
+        'name': 'cibench_template_wo_nltk:numeric_correct',
+        'subsets': [[i, 'numeric_correct'] for i in _cibench_template if 'nltk' not in i],
+        'weights': {'cibench_template/' + k : v[1] for k,v in _cibench_template_weight.items() if 'nltk' not in k},
+    },
+    {
+        'name': 'cibench_template_wo_nltk:vis_sim',
+        'subsets': [[i, 'vis_sim'] for i in _cibench_template if 'nltk' not in i],
+        'weights': {'cibench_template/' + k : v[3] for k,v in _cibench_template_weight.items() if 'nltk' not in k},
+    },
+])
+
+cibench_summary_groups.extend([
+    {
+        'name': 'cibench_template_cn_wo_nltk:executable',
+        'subsets': [[i, 'executable'] for i in _cibench_template_cn if 'nltk' not in i],
+        'weights': {'cibench_template_chinese/' + k : v[0] for k,v in _cibench_template_weight.items() if 'nltk' not in k},
+    },
+    {
+        'name': 'cibench_template_cn_wo_nltk:numeric_correct',
+        'subsets': [[i, 'numeric_correct'] for i in _cibench_template_cn if 'nltk' not in i],
+        'weights': {'cibench_template_chinese/' + k : v[1] for k,v in _cibench_template_weight.items() if 'nltk' not in k},
+    },
+    {
+        'name': 'cibench_template_cn_wo_nltk:vis_sim',
+        'subsets': [[i, 'vis_sim'] for i in _cibench_template_cn if 'nltk' not in i],
+        'weights': {'cibench_template_chinese/' + k : v[3] for k,v in _cibench_template_weight.items() if 'nltk' not in k},
+    },
+])
--- a/opencompass/cli/main.py
+++ b/opencompass/cli/main.py
@@ -170,6 +170,8 @@ def parse_dlc_args(dlc_parser):
                            type=str)


+
+
 def parse_hf_args(hf_parser):
    """These args are all for the quick construction of HuggingFace models."""
    hf_parser.add_argument('--hf-type', type=str, choices=['base', 'chat'], default='chat', help='The type of the HuggingFace model, base or chat')
@@ -212,7 +214,7 @@ def main():
    if args.work_dir is not None:
        cfg['work_dir'] = args.work_dir
    else:
-        cfg.setdefault('work_dir', osp.join('outputs', 'default'))
+        cfg.setdefault('work_dir', os.path.join('outputs', 'default'))

    # cfg_time_str defaults to the current time
    cfg_time_str = dir_time_str = datetime.now().strftime('%Y%m%d_%H%M%S')
@@ -340,5 +342,6 @@ def main():
        summarizer.summarize(time_str=cfg_time_str)


+
 if __name__ == '__main__':
    main()
--- a/opencompass/models/__init__.py
+++ b/opencompass/models/__init__.py
@@ -7,7 +7,8 @@ from .base import BaseModel, LMTemplateParser  # noqa: F401
 from .base_api import APITemplateParser, BaseAPIModel  # noqa: F401
 from .bytedance_api import ByteDance  # noqa: F401
 from .claude_api import Claude  # noqa: F401
-from .gemini_api import Gemini, GeminiAllesAPIN  # noqa: F401
+from .deepseek_api import DeepseekAPI  # noqa: F401
+from .gemini_api import Gemini  # noqa: F401
 from .glm import GLM130B  # noqa: F401
 from .huggingface import HuggingFace  # noqa: F401
 from .huggingface import HuggingFaceCausalLM  # noqa: F401
@@ -21,7 +22,7 @@ from .lightllm_api import LightllmAPI  # noqa: F401
 from .llama2 import Llama2, Llama2Chat  # noqa: F401
 from .lmdeploy_pytorch import LmdeployPytorchModel  # noqa: F401
 from .lmdeploy_tis import LmdeployTisModel  # noqa: F401
-from .minimax_api import MiniMax  # noqa: F401
+from .minimax_api import MiniMax, MiniMaxChatCompletionV2  # noqa: F401
 from .mistral_api import Mistral  # noqa: F401
 from .mixtral import Mixtral  # noqa: F401
 from .modelscope import ModelScope, ModelScopeCausalLM  # noqa: F401
@@ -31,11 +32,12 @@ from .openai_api import OpenAI  # noqa: F401
 from .pangu_api import PanGu  # noqa: F401
 from .qwen_api import Qwen  # noqa: F401
 from .sensetime_api import SenseTime  # noqa: F401
+from .stepfun_api import StepFun  # noqa: F401
 from .turbomind import TurboMindModel  # noqa: F401
 from .turbomind_tis import TurboMindTisModel  # noqa: F401
 from .unigpt_api import UniGPT  # noqa: F401
 from .vllm import VLLM  # noqa: F401
-from .xunfei_api import XunFei  # noqa: F401
+from .xunfei_api import XunFei, XunFeiSpark  # noqa: F401
 from .yayi_api import Yayi  # noqa: F401
 from .zhipuai_api import ZhiPuAI  # noqa: F401
 from .zhipuai_v2_api import ZhiPuV2AI  # noqa: F401
--- a/opencompass/models/ai360_api.py
+++ b/opencompass/models/ai360_api.py
-import time
 from concurrent.futures import ThreadPoolExecutor
 from typing import Dict, List, Optional, Union

@@ -141,29 +140,32 @@ class AI360GPT(BaseAPIModel):
                self.wait()
                continue
            if raw_response.status_code == 200:
-                try:
-                    msg = response['choices'][0]['message']['content'].strip()
-                    return msg
-
-                except KeyError:
-                    if 'error' in response:
-                        # tpm(token per minitue) limit
-                        if response['erro']['code'] == '1005':
-                            time.sleep(1)
-                            continue
-
-                        self.logger.error('Find error message in response: ',
-                                          str(response['error']))
+                msg = response['choices'][0]['message']['content'].strip()
+                self.logger.debug(f'Generated: {msg}')
+                return msg

            # sensitive content, prompt overlength, network error
            # or illegal prompt
-            if (raw_response.status_code == 400
-                    or raw_response.status_code == 401
-                    or raw_response.status_code == 402
-                    or raw_response.status_code == 429
-                    or raw_response.status_code == 500):
-                print(raw_response.text)
-                continue
+            if raw_response.status_code in [400, 401, 402, 429, 500]:
+                if 'error' not in response:
+                    print(raw_response.status_code)
+                    print(raw_response.text)
+                    continue
+                print(response)
+                # tpm(token per minitue) limit
+                if response['error']['code'] == '1005':
+                    self.logger.debug('tpm limit, ignoring')
+                    continue
+                elif response['error']['code'] == '1001':
+                    msg = '参数错误:messages参数过长或max_tokens参数值过大'
+                    self.logger.debug(f'Generated: {msg}')
+                    return msg
+                else:
+                    print(response)
+
+                self.logger.error('Find error message in response: ',
+                                  str(response['error']))
+
            print(raw_response)
            max_num_retries += 1


--- a/opencompass/models/baichuan_api.py
+++ b/opencompass/models/baichuan_api.py
@@ -145,8 +145,8 @@ class BaiChuan(BaseAPIModel):
                self.wait()
                continue
            if raw_response.status_code == 200:
-
                msg = response['choices'][0]['message']['content']
+                self.logger.debug(f'Generated: {msg}')
                return msg

            if raw_response.status_code != 200:

--- a/opencompass/models/baidu_api.py
+++ b/opencompass/models/baidu_api.py
@@ -53,6 +53,8 @@ class ERNIEBot(BaseAPIModel):
        self.headers = {'Content_Type': 'application/json'}
        self.secretkey = secretkey
        self.key = key
+        if not url.endswith('?access_token='):
+            url += '?access_token='
        self.url = url
        access_token, _ = self._generate_access_token()
        self.access_token = access_token
@@ -143,14 +145,25 @@ class ERNIEBot(BaseAPIModel):
            messages = [{'role': 'user', 'content': input}]
        else:
            messages = []
+            msg_buffer, last_role = [], None
            for item in input:
-                msg = {'content': item['prompt']}
-                if item['role'] == 'HUMAN':
-                    msg['role'] = 'user'
-                elif item['role'] == 'BOT':
-                    msg['role'] = 'assistant'
+                if item['role'] == 'BOT':
+                    role = 'assistant'
+                else:  # USER or SYSTEM
+                    role = 'user'
+                if role != last_role and last_role is not None:
+                    messages.append({
+                        'content': '\n'.join(msg_buffer),
+                        'role': last_role
+                    })
+                    msg_buffer = []
+                msg_buffer.append(item['prompt'])
+                last_role = role
+            messages.append({
+                'content': '\n'.join(msg_buffer),
+                'role': last_role
+            })

-                messages.append(msg)
        data = {'messages': messages}
        data.update(self.generation_kwargs)

@@ -181,6 +194,7 @@ class ERNIEBot(BaseAPIModel):
            if raw_response.status_code == 200:
                try:
                    msg = response['result']
+                    self.logger.debug(msg)
                    return msg
                except KeyError:
                    print(response)
@@ -188,9 +202,12 @@ class ERNIEBot(BaseAPIModel):
                    if response['error_code'] == 336007:
                        # exceed max length
                        return ''
-
-                    time.sleep(1)
-                    continue
+                    elif response['error_code'] == 336103:
+                        # prompt tokens too long
+                        return ''
+                    else:
+                        time.sleep(1)
+                        continue

            if (response['error_code'] == 110 or response['error_code'] == 100
                    or response['error_code'] == 111

--- a/opencompass/models/deepseek_api.py
+++ b/opencompass/models/deepseek_api.py
+import time
+from concurrent.futures import ThreadPoolExecutor
+from typing import Dict, List, Optional, Union
+
+import requests
+
+from opencompass.utils.prompt import PromptList
+
+from .base_api import BaseAPIModel
+
+PromptType = Union[PromptList, str]
+
+
+class DeepseekAPI(BaseAPIModel):
+    """Model wrapper around DeepseekAPI.
+
+    Documentation:
+
+    Args:
+        path (str): The name of DeepseekAPI model.
+            e.g. `moonshot-v1-32k`
+        key (str): Authorization key.
+        query_per_second (int): The maximum queries allowed per second
+            between two consecutive calls of the API. Defaults to 1.
+        max_seq_len (int): Unused here.
+        meta_template (Dict, optional): The model's meta prompt
+            template if needed, in case the requirement of injecting or
+            wrapping of any meta instructions.
+        retry (int): Number of retires if the API call fails. Defaults to 2.
+    """
+
+    def __init__(
+        self,
+        path: str,
+        key: str,
+        url: str,
+        query_per_second: int = 2,
+        max_seq_len: int = 2048,
+        meta_template: Optional[Dict] = None,
+        retry: int = 2,
+        system_prompt: str = '',
+    ):
+        super().__init__(path=path,
+                         max_seq_len=max_seq_len,
+                         query_per_second=query_per_second,
+                         meta_template=meta_template,
+                         retry=retry)
+        self.headers = {
+            'Content-Type': 'application/json',
+            'Authorization': 'Bearer ' + key,
+        }
+        self.url = url
+        self.model = path
+        self.system_prompt = system_prompt
+
+    def generate(
+        self,
+        inputs: List[PromptType],
+        max_out_len: int = 512,
+    ) -> List[str]:
+        """Generate results given a list of inputs.
+
+        Args:
+            inputs (List[PromptType]): A list of strings or PromptDicts.
+                The PromptDict should be organized in OpenCompass'
+                API format.
+            max_out_len (int): The maximum length of the output.
+
+        Returns:
+            List[str]: A list of generated strings.
+        """
+        with ThreadPoolExecutor() as executor:
+            results = list(
+                executor.map(self._generate, inputs,
+                             [max_out_len] * len(inputs)))
+        self.flush()
+        return results
+
+    def _generate(
+        self,
+        input: PromptType,
+        max_out_len: int = 512,
+    ) -> str:
+        """Generate results given an input.
+
+        Args:
+            inputs (PromptType): A string or PromptDict.
+                The PromptDict should be organized in OpenCompass'
+                API format.
+            max_out_len (int): The maximum length of the output.
+
+        Returns:
+            str: The generated string.
+        """
+        assert isinstance(input, (str, PromptList))
+
+        if isinstance(input, str):
+            messages = [{'role': 'user', 'content': input}]
+        else:
+            messages = []
+            msg_buffer, last_role = [], None
+            for item in input:
+                item['role'] = 'assistant' if item['role'] == 'BOT' else 'user'
+                if item['role'] != last_role and last_role is not None:
+                    messages.append({
+                        'content': '\n'.join(msg_buffer),
+                        'role': last_role
+                    })
+                    msg_buffer = []
+                msg_buffer.append(item['prompt'])
+                last_role = item['role']
+            messages.append({
+                'content': '\n'.join(msg_buffer),
+                'role': last_role
+            })
+
+        if self.system_prompt:
+            system = {'role': 'system', 'content': self.system_prompt}
+            messages.insert(0, system)
+
+        data = {'model': self.model, 'messages': messages}
+
+        max_num_retries = 0
+        while max_num_retries < self.retry:
+            self.acquire()
+            try:
+                raw_response = requests.request('POST',
+                                                url=self.url,
+                                                headers=self.headers,
+                                                json=data)
+            except Exception as err:
+                print('Request Error:{}'.format(err))
+                time.sleep(2)
+                continue
+
+            try:
+                response = raw_response.json()
+            except Exception as err:
+                print('Response Error:{}'.format(err))
+                response = None
+            self.release()
+
+            if response is None:
+                print('Connection error, reconnect.')
+                # if connect error, frequent requests will casuse
+                # continuous unstable network, therefore wait here
+                # to slow down the request
+                self.wait()
+                continue
+
+            if raw_response.status_code == 200:
+                # msg = json.load(response.text)
+                # response
+                msg = response['choices'][0]['message']['content']
+                self.logger.debug(f'Generated: {msg}')
+                return msg
+
+            if raw_response.status_code == 401:
+                print('请求被拒绝 api_key错误')
+                continue
+            elif raw_response.status_code == 400:
+                print(messages, response)
+                print('请求失败，状态码:', raw_response)
+                msg = 'The request was rejected because high risk'
+                return msg
+            elif raw_response.status_code == 429:
+                print(messages, response)
+                print('请求失败，状态码:', raw_response)
+                time.sleep(5)
+                continue
+            else:
+                print(messages, response)
+                print('请求失败，状态码:', raw_response)
+                time.sleep(1)
+
+            max_num_retries += 1
+
+        raise RuntimeError(raw_response)
--- a/opencompass/models/gemini_api.py
+++ b/opencompass/models/gemini_api.py
@@ -186,66 +186,3 @@ class Gemini(BaseAPIModel):
            time.sleep(1)

        raise RuntimeError('API call failed.')
-
-
-class GeminiAllesAPIN(Gemini):
-    """Model wrapper around Gemini models.
-
-    Documentation:
-
-    Args:
-        path (str): The name of Gemini model.
-            e.g. `gemini-pro`
-        key (str): Authorization key.
-        query_per_second (int): The maximum queries allowed per second
-            between two consecutive calls of the API. Defaults to 1.
-        max_seq_len (int): Unused here.
-        meta_template (Dict, optional): The model's meta prompt
-            template if needed, in case the requirement of injecting or
-            wrapping of any meta instructions.
-        retry (int): Number of retires if the API call fails. Defaults to 2.
-    """
-
-    def __init__(
-        self,
-        path: str,
-        key: str,
-        url: str,
-        query_per_second: int = 2,
-        max_seq_len: int = 2048,
-        meta_template: Optional[Dict] = None,
-        retry: int = 2,
-        temperature: float = 1.0,
-        top_p: float = 0.8,
-        top_k: float = 10.0,
-    ):
-        super().__init__(key=key,
-                         path=path,
-                         max_seq_len=max_seq_len,
-                         query_per_second=query_per_second,
-                         meta_template=meta_template,
-                         retry=retry)
-        # Replace the url and headers into AllesApin
-        self.url = url
-        self.headers = {
-            'alles-apin-token': key,
-            'content-type': 'application/json',
-        }
-
-    def generate(
-        self,
-        inputs: List[PromptType],
-        max_out_len: int = 512,
-    ) -> List[str]:
-        """Generate results given a list of inputs.
-
-        Args:
-            inputs (List[PromptType]): A list of strings or PromptDicts.
-                The PromptDict should be organized in OpenCompass'
-                API format.
-            max_out_len (int): The maximum length of the output.
-
-        Returns:
-            List[str]: A list of generated strings.
-        """
-        return super().generate(inputs, max_out_len)
--- a/opencompass/models/huggingface.py
+++ b/opencompass/models/huggingface.py
@@ -289,13 +289,13 @@ class HuggingFace(BaseModel):
        tokens = self.tokenizer.batch_encode_plus(inputs,
                                                  padding=True,
                                                  truncation=True,
-                                                  max_length=self.max_seq_len -
-                                                  max_out_len)
+                                                  max_length=self.max_seq_len)
        tokens = {
            k: torch.tensor(np.array(tokens[k]), device=self.model.device)
            for k in tokens if k in ['input_ids', 'attention_mask']
        }

+        origin_stopping_criteria = stopping_criteria
        if stopping_criteria:
            # Construct huggingface stopping criteria
            if self.tokenizer.eos_token is not None:
@@ -332,6 +332,9 @@ class HuggingFace(BaseModel):

        if self.end_str:
            decodeds = [token.split(self.end_str)[0] for token in decodeds]
+        if origin_stopping_criteria:
+            for t in origin_stopping_criteria:
+                decodeds = [token.split(t)[0] for token in decodeds]
        return decodeds

    def _single_generate(self,
@@ -382,6 +385,7 @@ class HuggingFace(BaseModel):
                                   max_length=self.max_seq_len -
                                   max_out_len)['input_ids']
        input_ids = torch.tensor(input_ids, device=self.model.device)
+        origin_stopping_criteria = stopping_criteria
        if stopping_criteria:
            # Construct huggingface stopping criteria
            if self.tokenizer.eos_token is not None:
@@ -419,6 +423,9 @@ class HuggingFace(BaseModel):

        if self.end_str:
            decodeds = [token.split(self.end_str)[0] for token in decodeds]
+        if origin_stopping_criteria:
+            for t in origin_stopping_criteria:
+                decodeds = [token.split(t)[0] for token in decodeds]
        return decodeds

    def get_logits(self, inputs: List[str]):

--- a/opencompass/models/minimax_api.py
+++ b/opencompass/models/minimax_api.py
@@ -180,3 +180,173 @@ class MiniMax(BaseAPIModel):
            max_num_retries += 1

        raise RuntimeError(response.text)
+
+
+class MiniMaxChatCompletionV2(BaseAPIModel):
+    """Model wrapper around MiniMax ChatCompletionV2.
+
+    Documentation:
+
+    Args:
+        path (str): The name of MiniMax model.
+            e.g. `moonshot-v1-32k`
+        key (str): Authorization key.
+        query_per_second (int): The maximum queries allowed per second
+            between two consecutive calls of the API. Defaults to 1.
+        max_seq_len (int): Unused here.
+        meta_template (Dict, optional): The model's meta prompt
+            template if needed, in case the requirement of injecting or
+            wrapping of any meta instructions.
+        retry (int): Number of retires if the API call fails. Defaults to 2.
+    """
+
+    def __init__(
+        self,
+        path: str,
+        key: str,
+        url: str,
+        query_per_second: int = 2,
+        max_seq_len: int = 2048,
+        meta_template: Optional[Dict] = None,
+        retry: int = 2,
+    ):
+        super().__init__(path=path,
+                         max_seq_len=max_seq_len,
+                         query_per_second=query_per_second,
+                         meta_template=meta_template,
+                         retry=retry)
+        self.headers = {
+            'Content-Type': 'application/json',
+            'Authorization': 'Bearer ' + key,
+        }
+        self.url = url
+        self.model = path
+
+    def generate(
+        self,
+        inputs: List[PromptType],
+        max_out_len: int = 512,
+    ) -> List[str]:
+        """Generate results given a list of inputs.
+
+        Args:
+            inputs (List[PromptType]): A list of strings or PromptDicts.
+                The PromptDict should be organized in OpenCompass'
+                API format.
+            max_out_len (int): The maximum length of the output.
+
+        Returns:
+            List[str]: A list of generated strings.
+        """
+        with ThreadPoolExecutor() as executor:
+            results = list(
+                executor.map(self._generate, inputs,
+                             [max_out_len] * len(inputs)))
+        self.flush()
+        return results
+
+    def _generate(
+        self,
+        input: PromptType,
+        max_out_len: int = 512,
+    ) -> str:
+        """Generate results given an input.
+
+        Args:
+            inputs (PromptType): A string or PromptDict.
+                The PromptDict should be organized in OpenCompass'
+                API format.
+            max_out_len (int): The maximum length of the output.
+
+        Returns:
+            str: The generated string.
+        """
+        assert isinstance(input, (str, PromptList))
+
+        if isinstance(input, str):
+            messages = [{'role': 'user', 'content': input}]
+        else:
+            messages = []
+            msg_buffer, last_role = [], None
+            for item in input:
+                item['role'] = 'assistant' if item['role'] == 'BOT' else 'user'
+                if item['role'] != last_role and last_role is not None:
+                    messages.append({
+                        'content': '\n'.join(msg_buffer),
+                        'role': last_role
+                    })
+                    msg_buffer = []
+                msg_buffer.append(item['prompt'])
+                last_role = item['role']
+            messages.append({
+                'content': '\n'.join(msg_buffer),
+                'role': last_role
+            })
+
+        data = {
+            'model': self.model,
+            'messages': messages,
+            'max_tokens': max_out_len
+        }
+
+        max_num_retries = 0
+        while max_num_retries < self.retry:
+            self.acquire()
+            try:
+                raw_response = requests.request('POST',
+                                                url=self.url,
+                                                headers=self.headers,
+                                                json=data)
+            except Exception as err:
+                print('Request Error:{}'.format(err))
+                time.sleep(2)
+                continue
+
+            response = raw_response.json()
+            self.release()
+
+            if response is None:
+                print('Connection error, reconnect.')
+                # if connect error, frequent requests will casuse
+                # continuous unstable network, therefore wait here
+                # to slow down the request
+                self.wait()
+                continue
+
+            if raw_response.status_code == 200:
+                try:
+                    msg = response['choices'][0]['message']['content']
+                    self.logger.debug(f'Generated: {msg}')
+                    return msg
+                except Exception:
+                    code = response.get('base_resp', {}).get('status_code')
+                    if code == 1002:
+                        # rate limit
+                        time.sleep(1)
+                        continue
+                    elif code == 1027:
+                        return 'The request was rejected because high risk'
+                    print(messages, response)
+                    pass
+
+            elif raw_response.status_code == 401:
+                print('请求被拒绝 api_key错误')
+                continue
+            elif raw_response.status_code == 400:
+                print(messages, response)
+                print('请求失败，状态码:', raw_response)
+                msg = 'The request was rejected because high risk'
+                return msg
+            elif raw_response.status_code == 429:
+                print(messages, response)
+                print('请求失败，状态码:', raw_response)
+                time.sleep(5)
+                continue
+            else:
+                print(messages, response)
+                print('请求失败，状态码:', raw_response)
+                time.sleep(1)
+
+            max_num_retries += 1
+
+        raise RuntimeError(raw_response)
--- a/opencompass/models/qwen_api.py
+++ b/opencompass/models/qwen_api.py
@@ -152,8 +152,7 @@ class Qwen(BaseAPIModel):
            if response.status_code == 200:
                try:
                    msg = response.output.text
-                    print('=' * 128)
-                    print(msg)
+                    self.logger.debug(msg)
                    return msg
                except KeyError:
                    print(response)

--- a/opencompass/models/stepfun_api.py
+++ b/opencompass/models/stepfun_api.py
+import time
+from concurrent.futures import ThreadPoolExecutor
+from typing import Dict, List, Optional, Union
+
+import requests
+
+from opencompass.utils.prompt import PromptList
+
+from .base_api import BaseAPIModel
+
+PromptType = Union[PromptList, str]
+
+
+class StepFun(BaseAPIModel):
+    """Model wrapper around StepFun.
+
+    Documentation:
+
+    Args:
+        path (str): The name of StepFun model.
+            e.g. `moonshot-v1-32k`
+        key (str): Authorization key.
+        query_per_second (int): The maximum queries allowed per second
+            between two consecutive calls of the API. Defaults to 1.
+        max_seq_len (int): Unused here.
+        meta_template (Dict, optional): The model's meta prompt
+            template if needed, in case the requirement of injecting or
+            wrapping of any meta instructions.
+        retry (int): Number of retires if the API call fails. Defaults to 2.
+    """
+
+    def __init__(
+        self,
+        path: str,
+        key: str,
+        url: str,
+        query_per_second: int = 2,
+        max_seq_len: int = 2048,
+        meta_template: Optional[Dict] = None,
+        retry: int = 2,
+        system_prompt: str = '',
+    ):
+        super().__init__(path=path,
+                         max_seq_len=max_seq_len,
+                         query_per_second=query_per_second,
+                         meta_template=meta_template,
+                         retry=retry)
+        self.headers = {
+            'Content-Type': 'application/json',
+            'Authorization': 'Bearer ' + key,
+        }
+        self.url = url
+        self.model = path
+        self.system_prompt = system_prompt
+
+    def generate(
+        self,
+        inputs: List[PromptType],
+        max_out_len: int = 512,
+    ) -> List[str]:
+        """Generate results given a list of inputs.
+
+        Args:
+            inputs (List[PromptType]): A list of strings or PromptDicts.
+                The PromptDict should be organized in OpenCompass'
+                API format.
+            max_out_len (int): The maximum length of the output.
+
+        Returns:
+            List[str]: A list of generated strings.
+        """
+        with ThreadPoolExecutor() as executor:
+            results = list(
+                executor.map(self._generate, inputs,
+                             [max_out_len] * len(inputs)))
+        self.flush()
+        return results
+
+    def _generate(
+        self,
+        input: PromptType,
+        max_out_len: int = 512,
+    ) -> str:
+        """Generate results given an input.
+
+        Args:
+            inputs (PromptType): A string or PromptDict.
+                The PromptDict should be organized in OpenCompass'
+                API format.
+            max_out_len (int): The maximum length of the output.
+
+        Returns:
+            str: The generated string.
+        """
+        assert isinstance(input, (str, PromptList))
+
+        if isinstance(input, str):
+            messages = [{'role': 'user', 'content': input}]
+        else:
+            messages = []
+            msg_buffer, last_role = [], None
+            for item in input:
+                item['role'] = 'assistant' if item['role'] == 'BOT' else 'user'
+                if item['role'] != last_role and last_role is not None:
+                    messages.append({
+                        'content': '\n'.join(msg_buffer),
+                        'role': last_role
+                    })
+                    msg_buffer = []
+                msg_buffer.append(item['prompt'])
+                last_role = item['role']
+            messages.append({
+                'content': '\n'.join(msg_buffer),
+                'role': last_role
+            })
+
+        if self.system_prompt:
+            system = {'role': 'system', 'content': self.system_prompt}
+            messages.insert(0, system)
+
+        data = {'model': self.model, 'messages': messages}
+
+        max_num_retries = 0
+        while max_num_retries < self.retry:
+            self.acquire()
+            try:
+                raw_response = requests.request('POST',
+                                                url=self.url,
+                                                headers=self.headers,
+                                                json=data)
+            except Exception as err:
+                print('Request Error:{}'.format(err))
+                time.sleep(2)
+                continue
+
+            try:
+                response = raw_response.json()
+            except Exception:
+                response = None
+            self.release()
+
+            if response is None:
+                print('Connection error, reconnect.')
+                # if connect error, frequent requests will casuse
+                # continuous unstable network, therefore wait here
+                # to slow down the request
+                self.wait()
+                continue
+
+            if raw_response.status_code == 200:
+                # msg = json.load(response.text)
+                # response
+                msg = response['choices'][0]['message']['content']
+                self.logger.debug(f'Generated: {msg}')
+                return msg
+
+            if raw_response.status_code == 400:
+                print(messages, response)
+                print('请求失败，状态码:', raw_response)
+                msg = 'The context length exceeded'
+                return msg
+            elif raw_response.status_code == 403:
+                print('请求被拒绝 api_key错误')
+                continue
+            elif raw_response.status_code == 429:
+                print(messages, response)
+                print('请求失败，状态码:', raw_response)
+                time.sleep(5)
+                continue
+            elif raw_response.status_code == 451:
+                print(messages, response)
+                print('请求失败，状态码:', raw_response)
+                msg = 'The request was rejected because high risk'
+                return msg
+            else:
+                print(messages, response)
+                print('请求失败，状态码:', raw_response)
+                time.sleep(1)
+
+            max_num_retries += 1
+
+        raise RuntimeError(raw_response)
--- a/opencompass/models/turbomind.py
+++ b/opencompass/models/turbomind.py
@@ -55,9 +55,6 @@ class TurboMindModel(BaseModel):
        if engine_config is not None:
            from lmdeploy.messages import TurbomindEngineConfig
            engine_config = TurbomindEngineConfig(**engine_config)
-        if gen_config is not None:
-            from lmdeploy.messages import EngineGenerationConfig
-            gen_config = EngineGenerationConfig(**gen_config)
        self.logger = get_logger()
        tm_model = TurboMind.from_pretrained(path, engine_config=engine_config)
        self.tokenizer = tm_model.tokenizer
@@ -106,6 +103,7 @@ class TurboMindModel(BaseModel):
                t = self.tokenizer.encode(t, add_bos=False)
                stop_words.append(t[0])
            gen_config['stop_words'] = list(set(stop_words))
+        gen_config.setdefault('min_new_tokens', 1)

        from lmdeploy.messages import EngineGenerationConfig
        gen_config = EngineGenerationConfig(**gen_config)
@@ -123,6 +121,9 @@ class TurboMindModel(BaseModel):
                        [gen_config] * len(batch_input),
                    ))
                results += _results
+        if stopping_criteria:
+            for s in stopping_criteria:
+                results = [r.split(s)[0] for r in results]
        return results

    def get_token_len(self, prompt: str) -> int:

--- a/opencompass/models/xunfei_api.py
+++ b/opencompass/models/xunfei_api.py
 import json
+import re
+import time
 from concurrent.futures import ThreadPoolExecutor
 from typing import Dict, List, Optional, Union

@@ -221,3 +223,150 @@ class XunFei(BaseAPIModel):
        if err_code == 10013:
            return err_data['header']['message']
        raise RuntimeError(f'Code: {err_code}, data: {err_data}')
+
+
+class XunFeiSpark(BaseAPIModel):
+    """Model wrapper around XunFeiSpark.
+
+    Documentation:
+
+    Args:
+        path (str): The name of XunFeiSpark model.
+            e.g. `moonshot-v1-32k`
+        key (str): Authorization key.
+        query_per_second (int): The maximum queries allowed per second
+            between two consecutive calls of the API. Defaults to 1.
+        max_seq_len (int): Unused here.
+        meta_template (Dict, optional): The model's meta prompt
+            template if needed, in case the requirement of injecting or
+            wrapping of any meta instructions.
+        retry (int): Number of retires if the API call fails. Defaults to 2.
+    """
+
+    def __init__(
+        self,
+        path: str,
+        url: str,
+        app_id: str,
+        api_key: str,
+        api_secret: str,
+        query_per_second: int = 2,
+        max_seq_len: int = 2048,
+        meta_template: Optional[Dict] = None,
+        retry: int = 2,
+    ):
+        super().__init__(path=path,
+                         max_seq_len=max_seq_len,
+                         query_per_second=query_per_second,
+                         meta_template=meta_template,
+                         retry=retry)
+        try:
+            from sparkai.llm.llm import ChatSparkLLM  # noqa: F401
+        except ImportError:
+            raise ImportError('run `pip install --upgrade spark_ai_python`')
+
+        self.spark_domain = path
+        self.url = url
+        self.app_id = app_id
+        self.api_key = api_key
+        self.api_secret = api_secret
+
+    def generate(
+        self,
+        inputs: List[PromptType],
+        max_out_len: int = 512,
+    ) -> List[str]:
+        results = [self._generate(input, max_out_len) for input in inputs]
+        return results
+
+    def _generate(
+        self,
+        input: PromptType,
+        max_out_len: int = 512,
+    ) -> str:
+        assert isinstance(input, (str, PromptList))
+
+        from sparkai.core.messages import ChatMessage
+        from sparkai.llm.llm import ChatSparkLLM
+
+        if isinstance(input, str):
+            messages = [ChatMessage(role='user', content=input)]
+        else:
+            messages = []
+            msg_buffer, last_role = [], None
+            for index, item in enumerate(input):
+                if index == 0 and item['role'] == 'SYSTEM':
+                    role = 'system'
+                elif item['role'] == 'BOT':
+                    role = 'assistant'
+                else:
+                    role = 'user'
+
+                if role != last_role and last_role is not None:
+                    content = '\n'.join(msg_buffer)
+                    messages.append(
+                        ChatMessage(role=last_role, content=content))
+                    msg_buffer = []
+
+                msg_buffer.append(item['prompt'])
+                last_role = role
+
+            content = '\n'.join(msg_buffer)
+            messages.append(ChatMessage(role=last_role, content=content))
+
+        spark = ChatSparkLLM(
+            spark_api_url=self.url,
+            spark_app_id=self.app_id,
+            spark_api_key=self.api_key,
+            spark_api_secret=self.api_secret,
+            spark_llm_domain=self.spark_domain,
+            streaming=False,
+            max_tokens=max_out_len,
+        )
+
+        all_empty_response = True
+        for _ in range(self.retry + 1):
+            try:
+                outputs = spark.generate([messages]).generations[0]
+                if len(outputs) == 0:
+                    self.logger.error('Empty response, retrying...')
+                    continue
+                msg = outputs[0].text
+                self.logger.debug(f'Generated: {msg}')
+                return msg
+            except ConnectionError as e:
+                match = re.match(r'Error Code: (\d+), Error: (.*)',
+                                 e.args[0],
+                                 flags=re.DOTALL)
+                if match:
+                    error_code = int(match.group(1))
+                    msg = match.group(2)
+                    if error_code == 10003:  # query data exceed limit
+                        self.logger.error(f'Error {error_code}: {msg}')
+                        return msg
+                    elif error_code in [10013, 10014]:  # skip safety problem
+                        self.logger.debug(f'Generated: {msg}')
+                        return msg
+                    elif error_code == 10020:  # plugin result is empty
+                        self.logger.error(f'Error {error_code}: {msg}')
+                        return msg
+                    elif error_code == 11202:  # qps limit
+                        time.sleep(1)
+                    else:
+                        self.logger.error(f'Error {error_code}: {msg}')
+                        raise e
+                raise e
+            except TimeoutError:
+                self.logger.error('TimeoutError, sleep 60, retrying...')
+                time.sleep(60)
+            except Exception as e:
+                self.logger.error(str(e))
+                pass
+
+            all_empty_response = False
+
+        if all_empty_response:
+            self.logger.error('All empty response')
+            return 'all empty response'
+
+        raise RuntimeError('Failed to generate response')
--- a/opencompass/runners/dlc.py
+++ b/opencompass/runners/dlc.py
@@ -141,7 +141,7 @@ class DLCRunner(BaseRunner):

            hf_offline = self.aliyun_cfg.get('hf_offline', True)
            if hf_offline:
-                shell_cmd += 'export HF_DATASETS_OFFLINE=1; export TRANSFORMERS_OFFLINE=1; export HF_EVALUATE_OFFLINE=1; '  # noqa: E501
+                shell_cmd += 'export HF_DATASETS_OFFLINE=1; export TRANSFORMERS_OFFLINE=1; export HF_EVALUATE_OFFLINE=1; export HF_HUB_OFFLINE=1; '  # noqa: E501

            http_proxy = self.aliyun_cfg.get('http_proxy')
            if http_proxy is not None:
@@ -158,6 +158,7 @@ class DLCRunner(BaseRunner):
                    shell_cmd += f'export {extra_env}; '

            shell_cmd += f'cd {pwd}; '
+            shell_cmd += 'umask 0000; '
            shell_cmd += '{task_cmd}'

            tmpl = ('dlc create job'
@@ -195,7 +196,10 @@ class DLCRunner(BaseRunner):
                index_to_start = 0
                while index_to_start < num_retry_to_start:
                    index_to_start += 1
-                    output = subprocess.getoutput(cmd)
+                    try:
+                        output = subprocess.getoutput(cmd)
+                    except BlockingIOError:
+                        output = ''
                    match = re.search(r'\|\s+(dlc[0-9a-z]+)\s+\|', output)
                    if match is None:
                        stdout.write('Failed to get job id from output:')
@@ -264,7 +268,10 @@ class DLCRunner(BaseRunner):
                                f" -c {self.aliyun_cfg['dlc_config_path']}"
                                f' --start_time {pri_time}'
                                f' --end_time {cur_time}')
-                    log_output = subprocess.getoutput(logs_cmd)
+                    try:
+                        log_output = subprocess.getoutput(logs_cmd)
+                    except BlockingIOError:
+                        log_output = '[WARN] No logs found for the pod'

                    if '[WARN] No logs found for the pod' not in log_output:
                        pri_time = cur_time

--- a/opencompass/runners/local.py
+++ b/opencompass/runners/local.py
@@ -46,17 +46,19 @@ class LocalRunner(BaseRunner):
        lark_bot_url (str): Lark bot url.
    """

-    def __init__(
-        self,
-        task: ConfigDict,
-        max_num_workers: int = 16,
-        debug: bool = False,
-        max_workers_per_gpu: int = 1,
-        lark_bot_url: str = None,
-    ):
+    def __init__(self,
+                 task: ConfigDict,
+                 max_num_workers: int = 16,
+                 debug: bool = False,
+                 max_workers_per_gpu: int = 1,
+                 lark_bot_url: str = None,
+                 **kwargs):
        super().__init__(task=task, debug=debug, lark_bot_url=lark_bot_url)
        self.max_num_workers = max_num_workers
        self.max_workers_per_gpu = max_workers_per_gpu
+        logger = get_logger()
+        for k, v in kwargs.items():
+            logger.warning(f'Ignored argument in {self.__module__}: {k}={v}')

    def launch(self, tasks: List[Dict[str, Any]]) -> List[Tuple[str, int]]:
        """Launch multiple tasks.

--- a/opencompass/utils/text_postprocessors.py
+++ b/opencompass/utils/text_postprocessors.py
@@ -94,11 +94,11 @@ def first_option_postprocess(text: str, options: str, cushion=True) -> str:
        f'答案是\s?(\S+)(?:。|$)',
        f'答案应该是\s?(\S+)(?:。|$)',
        f'答案为\s?(\S+)(?:。|$)',
-        f'[Tt]he answer is \(?([{options}])\)?',
-        f'[Tt]he answer is option \(?([{options}])\)?',
-        f'[Tt]he correct answer is \(?([{options}])\)?',
-        f'[Tt]he correct answer is option \(?([{options}])\)?',
-        f'[Tt]he answer to the question is \(?([{options}])\)?',
+        f'[Tt]he answer is:?\s+\(?([{options}])\)?',
+        f'[Tt]he answer is option:?\s+\(?([{options}])\)?',
+        f'[Tt]he correct answer is:?\s+\(?([{options}])\)?',
+        f'[Tt]he correct answer is option:?\s+\(?([{options}])\)?',
+        f'[Tt]he answer to the question is:?\s+\(?([{options}])\)?',
        f'^选项\s?([{options}])',
        f'^([{options}])\s?选?项',
        f'(\s|^)[{options}][\s。，,：:\.$]',
@@ -116,7 +116,7 @@ def first_option_postprocess(text: str, options: str, cushion=True) -> str:
    if cushion:
        patterns.extend(cushion_patterns)
    for pattern in patterns:
-        match = re.search(pattern, text)
+        match = re.search(pattern, text, re.DOTALL)
        if match:
            outputs = match.group(0)
            for i in options: