[Sync] update taco (#1030)

b39f5015 · Fengzhe Zhou · GitHub · 16f29b25 · b39f5015 · b39f5015
Unverified Commit b39f5015 authored Apr 09, 2024 by Fengzhe Zhou Committed by GitHub Apr 09, 2024
20 changed files
--- a/opencompass/models/qwen_api.py
+++ b/opencompass/models/qwen_api.py
@@ -48,13 +48,13 @@ class Qwen(BaseAPIModel):

    def generate(
        self,
-        inputs: List[str or PromptList],
+        inputs: List[PromptType],
        max_out_len: int = 512,
    ) -> List[str]:
        """Generate results given a list of inputs.

        Args:
-            inputs (List[str or PromptList]): A list of strings or PromptDicts.
+            inputs (List[PromptType]): A list of strings or PromptDicts.
                The PromptDict should be organized in OpenCompass'
                API format.
            max_out_len (int): The maximum length of the output.
@@ -71,13 +71,13 @@ class Qwen(BaseAPIModel):

    def _generate(
        self,
-        input: str or PromptList,
+        input: PromptType,
        max_out_len: int = 512,
    ) -> str:
        """Generate results given an input.

        Args:
-            inputs (str or PromptList): A string or PromptDict.
+            inputs (PromptType): A string or PromptDict.
                The PromptDict should be organized in OpenCompass'
                API format.
            max_out_len (int): The maximum length of the output.
@@ -103,16 +103,26 @@ class Qwen(BaseAPIModel):
            messages = [{'role': 'user', 'content': input}]
        else:
            messages = []
-            for item in input:
-                msg = {'content': item['prompt']}
-                if item['role'] == 'HUMAN':
-                    msg['role'] = 'user'
+            msg_buffer, last_role = [], None
+            for index, item in enumerate(input):
+                if index == 0 and item['role'] == 'SYSTEM':
+                    role = 'system'
                elif item['role'] == 'BOT':
-                    msg['role'] = 'assistant'
-                elif item['role'] == 'SYSTEM':
-                    msg['role'] = 'system'
-
-                messages.append(msg)
+                    role = 'assistant'
+                else:
+                    role = 'user'
+                if role != last_role and last_role is not None:
+                    messages.append({
+                        'content': '\n'.join(msg_buffer),
+                        'role': last_role
+                    })
+                    msg_buffer = []
+                msg_buffer.append(item['prompt'])
+                last_role = role
+            messages.append({
+                'content': '\n'.join(msg_buffer),
+                'role': last_role
+            })
        data = {'messages': messages}
        data.update(self.generation_kwargs)

@@ -142,6 +152,8 @@ class Qwen(BaseAPIModel):
            if response.status_code == 200:
                try:
                    msg = response.output.text
+                    print('=' * 128)
+                    print(msg)
                    return msg
                except KeyError:
                    print(response)
@@ -153,6 +165,8 @@ class Qwen(BaseAPIModel):
                time.sleep(2)
                continue
            if response.status_code == 400:
+                print('=' * 128)
+                print(response)
                msg = 'Output data may contain inappropriate content.'
                return msg


--- a/opencompass/models/sensetime_api.py
+++ b/opencompass/models/sensetime_api.py
@@ -61,13 +61,13 @@ class SenseTime(BaseAPIModel):

    def generate(
        self,
-        inputs: List[str or PromptList],
+        inputs: List[PromptType],
        max_out_len: int = 512,
    ) -> List[str]:
        """Generate results given a list of inputs.

        Args:
-            inputs (List[str or PromptList]): A list of strings or PromptDicts.
+            inputs (List[PromptType]): A list of strings or PromptDicts.
                The PromptDict should be organized in OpenCompass'
                API format.
            max_out_len (int): The maximum length of the output.
@@ -84,13 +84,13 @@ class SenseTime(BaseAPIModel):

    def _generate(
        self,
-        input: str or PromptList,
+        input: PromptType,
        max_out_len: int = 512,
    ) -> str:
        """Generate results given an input.

        Args:
-            inputs (str or PromptList): A string or PromptDict.
+            inputs (PromptType): A string or PromptDict.
                The PromptDict should be organized in OpenCompass'
                API format.
            max_out_len (int): The maximum length of the output.
@@ -114,7 +114,8 @@ class SenseTime(BaseAPIModel):
                messages.append(msg)

        data = {'messages': messages, 'model': self.model}
-        data.update(self.params)
+        if self.params is not None:
+            data.update(self.params)

        stream = data['stream']

@@ -123,10 +124,14 @@ class SenseTime(BaseAPIModel):
            self.acquire()

            max_num_retries += 1
-            raw_response = requests.request('POST',
-                                            url=self.url,
-                                            headers=self.headers,
-                                            json=data)
+            try:
+                raw_response = requests.request('POST',
+                                                url=self.url,
+                                                headers=self.headers,
+                                                json=data)
+            except Exception:
+                time.sleep(1)
+                continue
            requests_id = raw_response.headers['X-Request-Id']  # noqa
            self.release()


--- a/opencompass/models/turbomind.py
+++ b/opencompass/models/turbomind.py
+import copy
 from concurrent.futures import ThreadPoolExecutor
 from typing import Dict, List, Optional, Union

@@ -70,11 +71,10 @@ class TurboMindModel(BaseModel):
        self.gen_config = gen_config
        self.end_str = end_str

-    def generate(
-        self,
-        inputs: List[str],
-        max_out_len: int = 512,
-    ) -> List[str]:
+    def generate(self,
+                 inputs: List[str],
+                 max_out_len: int = 512,
+                 **kwargs) -> List[str]:
        """Generate results given a list of inputs.

        Args:
@@ -93,6 +93,15 @@ class TurboMindModel(BaseModel):
            inputs[i:i + batch_size] for i in range(0, len(inputs), batch_size)
        ]

+        gen_config = copy.deepcopy(self.gen_config)
+        if 'do_sample' in kwargs:
+            if kwargs['do_sample']:
+                gen_config.top_k = 1000
+                gen_config.temperature = kwargs.get('temperature', 1)
+            else:
+                gen_config.top_k = 1
+                gen_config.temperature = 0.01
+
        results = []
        for batch_input in batch_inputs:
            with ThreadPoolExecutor() as executor:
@@ -103,7 +112,7 @@ class TurboMindModel(BaseModel):
                        self.generator_ids[:len(batch_input)],
                        batch_input,
                        [max_out_len] * len(batch_input),
-                        [self.gen_config] * len(batch_input),
+                        [gen_config] * len(batch_input),
                        [self.end_str] * len(batch_input),
                    ))
                results += _results
@@ -123,14 +132,14 @@ class TurboMindModel(BaseModel):
    def _generate(self,
                  generator,
                  session_id,
-                  prompt: str or PromptList,
+                  prompt: PromptType,
                  max_out_len: int,
                  gen_config=None,
                  end_str: Optional[str] = None) -> str:
        """Generate results given a list of inputs.

        Args:
-            prompt (str or PromptList): A string or PromptDict.
+            prompt (PromptType): A string or PromptDict.
                The PromptDict should be organized in OpenCompass'
                API format.
            max_out_len (int): The maximum length of the output.
@@ -189,3 +198,22 @@ class TurboMindModel(BaseModel):
            results.append(res)
        results = np.concatenate(results)
        return results
+
+    def get_loglikelihood(
+            self,
+            inputs: List[str],
+            conts: List[str],
+            mask_length: Optional[List[int]] = None) -> List[float]:
+        assert isinstance(
+            inputs, List), f'List(str) is expected, but got {type(inputs)}'
+        results = []
+        for text, cont in zip(inputs, conts):
+            input_ids = self.tokenizer.encode(text)
+            res = self.generators[0].get_ppl(input_ids)
+            logit_sum = res * len(input_ids)
+            input_ids = self.tokenizer.encode(text.replace(cont, ''))
+            res = self.generators[0].get_ppl(input_ids)
+            logit_part = res * len(input_ids)
+            results.append(-(logit_sum - logit_part))
+        results = np.concatenate(results)
+        return results
--- a/opencompass/models/turbomind_api.py
+++ b/opencompass/models/turbomind_api.py
@@ -60,14 +60,14 @@ class TurboMindAPIModel(BaseModel):

    def generate(
        self,
-        inputs: List[str or PromptList],
+        inputs: List[PromptType],
        max_out_len: int = 512,
        temperature: float = 1.0,
    ) -> List[str]:
        """Generate results given a list of inputs.

        Args:
-            inputs (List[str or PromptList]): A list of strings or PromptDicts.
+            inputs (List[PromptType]): A list of strings or PromptDicts.
                The PromptDict should be organized in OpenCompass'
                API format.
            max_out_len (int): The maximum length of the output.
@@ -102,12 +102,12 @@ class TurboMindAPIModel(BaseModel):
        """
        return self.token_bucket.get_token()

-    def _generate(self, prompt: str or PromptList, max_out_len: int,
+    def _generate(self, prompt: PromptType, max_out_len: int,
                  temperature: float, end_str: str) -> str:
        """Generate results given a list of inputs.

        Args:
-            prompt (str or PromptList): A string or PromptDict.
+            prompt (PromptType): A string or PromptDict.
                The PromptDict should be organized in OpenCompass'
                API format.
            max_out_len (int): The maximum length of the output.

--- a/opencompass/models/turbomind_tis.py
+++ b/opencompass/models/turbomind_tis.py
@@ -58,14 +58,14 @@ class TurboMindTisModel(BaseModel):

    def generate(
        self,
-        inputs: List[str or PromptList],
+        inputs: List[PromptType],
        max_out_len: int = 512,
        temperature: float = 1.0,
    ) -> List[str]:
        """Generate results given a list of inputs.

        Args:
-            inputs (List[str or PromptList]): A list of strings or PromptDicts.
+            inputs (List[PromptType]): A list of strings or PromptDicts.
                The PromptDict should be organized in OpenCompass'
                API format.
            max_out_len (int): The maximum length of the output.
@@ -96,12 +96,12 @@ class TurboMindTisModel(BaseModel):
        """
        return self.token_bucket.get_token()

-    def _generate(self, prompt: str or PromptList, max_out_len: int,
+    def _generate(self, prompt: PromptType, max_out_len: int,
                  temperature: float) -> str:
        """Generate results given a list of inputs.

        Args:
-            prompt (str or PromptList): A string or PromptDict.
+            prompt (PromptType): A string or PromptDict.
                The PromptDict should be organized in OpenCompass'
                API format.
            max_out_len (int): The maximum length of the output.

--- a/opencompass/models/unigpt_api.py
+++ b/opencompass/models/unigpt_api.py
+import hashlib
+import time
+import uuid
+from concurrent.futures import ThreadPoolExecutor
+from typing import Dict, List, Optional, Union
+
+import requests
+
+from opencompass.utils.prompt import PromptList
+
+from .base_api import BaseAPIModel
+
+PromptType = Union[PromptList, str]
+
+
+def get_sign(appkey, udid, timestamp, secret):
+    original_str = f'{appkey}{udid}{timestamp}{secret}'
+    sign = ''
+    try:
+        md = hashlib.sha256()
+        md.update(original_str.encode('utf-8'))
+        bytes_result = md.digest()
+        for byte in bytes_result:
+            hex_value = format(byte, '02X')
+            sign += hex_value.upper()
+    except Exception as e:
+        print(e)
+    return sign
+
+
+class UniGPT(BaseAPIModel):
+
+    def __init__(
+        self,
+        path: str,
+        appkey: str,
+        secret: str,
+        url: str,
+        query_per_second: int = 2,
+        max_seq_len: int = 2048,
+        meta_template: Optional[Dict] = None,
+        retry: int = 2,
+        temperature: float = 0.2,
+    ):  # noqa E125
+        super().__init__(
+            path=path,
+            max_seq_len=max_seq_len,
+            query_per_second=query_per_second,
+            meta_template=meta_template,
+            retry=retry,
+        )
+
+        self.appkey = appkey
+        self.secret = secret
+        self.udid = str(uuid.uuid1())
+        self.url = url
+        self.model = path
+        self.temperature = temperature
+
+    def generate(self,
+                 inputs: List[PromptType],
+                 max_out_len: int = 512) -> List[str]:
+        """Generate results given a list of inputs.
+
+        Args:
+            inputs (List[PromptType]): A list of strings or PromptDicts.
+                The PromptDict should be organized in OpenCompass'
+                API format.
+            max_out_len (int): The maximum length of the output.
+
+        Returns:
+            List[str]: A list of generated strings.
+        """
+        with ThreadPoolExecutor() as executor:
+            results = list(
+                executor.map(self._generate, inputs,
+                             [max_out_len] * len(inputs)))
+        self.flush()
+        return results
+
+    def _generate(self, input: PromptType, max_out_len: int = 512) -> str:
+        """Generate results given an input.
+
+        Args:
+            inputs (PromptType): A string or PromptDict.
+                The PromptDict should be organized in OpenCompass'
+                API format.
+            max_out_len (int): The maximum length of the output.
+
+        Returns:
+            str: The generated string.
+        """
+
+        assert isinstance(input, (str, PromptList))
+
+        if isinstance(input, str):
+            messages = [{'role': 'user', 'content': input}]
+        else:
+            messages = []
+            for item in input:
+                msg = {'content': item['prompt']}
+                if item['role'] == 'HUMAN':
+                    msg['role'] = 'user'
+                elif item['role'] == 'BOT':
+                    msg['role'] = 'assistant'
+                elif item['role'] == 'SYSTEM':
+                    msg['role'] = 'system'
+                messages.append(msg)
+
+        data = {
+            'model': self.path,
+            'temperature': self.temperature,
+            'messages': messages,
+            'max_tokens': max_out_len,
+        }
+
+        timestamp = str(int(time.time()) * 1000)
+        headers = {
+            'appkey': self.appkey,
+            'sign': get_sign(self.appkey, self.udid, timestamp, self.secret),
+            'stream': 'false',
+            'timestamp': timestamp,
+            'udid': self.udid,
+            'censor': 'none',
+        }
+
+        for _ in range(self.retry):
+            try:
+                response = requests.post(self.url, json=data, headers=headers)
+            except Exception as e:
+                print(e)
+                continue
+            if response is None or response.status_code != 200:
+                code = response.status_code if response else -1
+                print(f'request err, status_code: {code}')
+                time.sleep(10)
+                continue
+            try:
+                response = response.json()
+            except Exception as e:
+                print(e)
+                continue
+            print(response)
+            if response.get('errorCode') == '8500502':
+                return 'context_length_exceeded'
+            return response['result']['choices'][0]['message']['content']
+        raise RuntimeError(f'Failed to respond in {self.retry} retrys')
--- a/opencompass/models/xunfei_api.py
+++ b/opencompass/models/xunfei_api.py
@@ -98,13 +98,13 @@ class XunFei(BaseAPIModel):

    def generate(
        self,
-        inputs: List[str or PromptList],
+        inputs: List[PromptType],
        max_out_len: int = 512,
    ) -> List[str]:
        """Generate results given a list of inputs.

        Args:
-            inputs (List[str or PromptList]): A list of strings or PromptDicts.
+            inputs (List[PromptType]): A list of strings or PromptDicts.
                The PromptDict should be organized in OpenCompass'
                API format.
            max_out_len (int): The maximum length of the output.
@@ -121,13 +121,13 @@ class XunFei(BaseAPIModel):

    def _generate(
        self,
-        input: str or PromptList,
+        input: PromptType,
        max_out_len: int = 512,
    ) -> List[str]:
        """Generate results given an input.

        Args:
-            inputs (str or PromptList): A string or PromptDict.
+            inputs (PromptType): A string or PromptDict.
                The PromptDict should be organized in OpenCompass'
                API format.
            max_out_len (int): The maximum length of the output.

--- a/opencompass/models/yayi_api.py
+++ b/opencompass/models/yayi_api.py
+import base64
+import hashlib
+import hmac
+import random
+import string
+import time
+from concurrent.futures import ThreadPoolExecutor
+from datetime import datetime
+from typing import Dict, List, Optional, Union
+
+import requests
+
+from opencompass.utils.prompt import PromptList
+
+from .base_api import BaseAPIModel
+
+PromptType = Union[PromptList, str]
+
+
+def generate_random_string(length=16):
+    """生成随机串.
+
+    :param length: 随机串长度，默认为 16
+    :return: 随机串
+    """
+    letters = string.ascii_letters + string.digits
+    rand_str = ''.join(random.choice(letters) for i in range(length))
+    return rand_str
+
+
+def get_current_time(format='%Y-%m-%d %H:%M:%S'):
+    """获取当前时间.
+
+    :param format: 时间格式，默认为 '%H:%M:%S'
+    :return: 当前时间字符串
+    """
+    now = datetime.now()
+    time_str = now.strftime(format)
+    return time_str
+
+
+def get_current_timestamp():
+    """
+    获取当前时间时间戳
+    :return:
+    """
+    timestamp_str = int(round(time.time() * 1000))
+    return str(timestamp_str)
+
+
+def encode_base64_string(s):
+    """对字符串进行 Base64 编码.
+
+    :param s: 字符串
+    :return: 编码后的字符串
+    """
+    encoded = base64.b64encode(s).decode()
+    return encoded
+
+
+def get_current_time_gmt_format():
+    """
+    获取当前时间的GMT 时间
+    :return:
+    """
+    GMT_FORMAT = '%a, %d %b %Y %H:%M:%SGMT+00:00'
+    now = datetime.now()
+    time_str = now.strftime(GMT_FORMAT)
+    return time_str
+
+
+class Yayi(BaseAPIModel):
+    """Model wrapper around SenseTime.
+
+    Args:
+        path (str): The name of SenseTime model.
+            e.g. `nova-ptc-xl-v1`
+        key (str): Authorization key.
+        query_per_second (int): The maximum queries allowed per second
+            between two consecutive calls of the API. Defaults to 1.
+        max_seq_len (int): Unused here.
+        meta_template (Dict, optional): The model's meta prompt
+            template if needed, in case the requirement of injecting or
+            wrapping of any meta instructions.
+        retry (int): Number of retires if the API call fails. Defaults to 2.
+    """
+
+    def __init__(
+        self,
+        path: str,
+        url: str,
+        url_path: str,
+        x_tilake_app_key: str,
+        x_tilake_app_secret: str,
+        x_tilake_ca_sginature_method: str,
+        query_per_second: int = 2,
+        max_seq_len: int = 2048,
+        meta_template: Optional[Dict] = None,
+        retry: int = 2,
+        temperature: float = 0.4,
+    ):
+        super().__init__(
+            path=path,
+            max_seq_len=max_seq_len,
+            query_per_second=query_per_second,
+            meta_template=meta_template,
+            retry=retry,
+        )
+
+        self.url = url
+        self.url_path = url_path
+        self.X_TILAKE_APP_KEY = x_tilake_app_key
+        self.X_TILAKE_APP_SECRET = x_tilake_app_secret
+        self.X_TILAKE_CA_SGINATURE_METHOD = x_tilake_ca_sginature_method
+        self.temperature = temperature
+        self.model = path
+
+    def generate_signature(self, method, accept, content_type, date, url_path):
+        """生成签名.
+
+        :param method:
+        :param accept:
+        :param content_type:
+        :param date:
+        :param url_path:
+        :return:
+        """
+        string_to_sign = (method + '\n' + accept + '\n' + content_type + '\n' +
+                          date + '\n' + url_path)
+        string_to_sign = string_to_sign.encode('utf-8')
+        secret_key = self.X_TILAKE_APP_SECRET.encode('utf-8')
+        signature = hmac.new(secret_key, string_to_sign,
+                             hashlib.sha256).digest()
+        return encode_base64_string(signature)
+
+    def generate_header(self, content_type, accept, date, signature):
+        """生成请求头参数.
+
+        :param content_type:
+        :param accept:
+        :return:
+        """
+        headers = {
+            'x-tilake-app-key': self.X_TILAKE_APP_KEY,
+            'x-tilake-ca-signature-method': self.X_TILAKE_CA_SGINATURE_METHOD,
+            'x-tilake-ca-timestamp': get_current_timestamp(),
+            'x-tilake-ca-nonce': generate_random_string(),
+            'x-tilake-ca-signature': signature,
+            'Date': date,
+            'Content-Type': content_type,
+            'Accept': accept,
+        }
+        return headers
+
+    def generate(
+        self,
+        inputs: List[PromptType],
+        max_out_len: int = 512,
+    ) -> List[str]:
+        """Generate results given a list of inputs.
+
+        Args:
+            inputs (List[PromptType]): A list of strings or PromptDicts.
+                The PromptDict should be organized in OpenCompass'
+                API format.
+            max_out_len (int): The maximum length of the output.
+
+        Returns:
+            List[str]: A list of generated strings.
+        """
+        with ThreadPoolExecutor() as executor:
+            results = list(
+                executor.map(self._generate, inputs,
+                             [max_out_len] * len(inputs)))
+        self.flush()
+        return results
+
+    def _generate(
+        self,
+        input: PromptType,
+        max_out_len: int = 512,
+    ) -> str:
+        """Generate results given an input.
+
+        Args:
+            inputs (PromptType): A string or PromptDict.
+                The PromptDict should be organized in OpenCompass'
+                API format.
+            max_out_len (int): The maximum length of the output.
+
+        Returns:
+            str: The generated string.
+        """
+        assert isinstance(input, (str, PromptList))
+
+        if isinstance(input, str):
+            messages = [{'role': 'user', 'content': input}]
+        else:
+            messages = []
+            msg_buffer, last_role = [], None
+            for item in input:
+                item['role'] = 'yayi' if item['role'] == 'BOT' else 'user'
+                if item['role'] != last_role and last_role is not None:
+                    messages.append({
+                        'content': '\n'.join(msg_buffer),
+                        'role': last_role
+                    })
+                    msg_buffer = []
+                msg_buffer.append(item['prompt'])
+                last_role = item['role']
+            messages.append({
+                'content': '\n'.join(msg_buffer),
+                'role': last_role
+            })
+
+        date = get_current_time_gmt_format()
+        content_type = 'application/json'
+        accept = '*/*'
+        method = 'POST'
+        data = {
+            'id': '001',  # 请求id，无需修改。
+            'model': self.model,
+            'messages': messages,
+            'max_new_tokens': max_out_len,  # max_new_tokens及以下参数可根据实际任务进行调整。
+            'temperature': self.temperature,
+            'presence_penalty': 0.85,
+            'frequency_penalty': 0.16,
+            'do_sample': True,
+            'top_p': 1.0,
+            'top_k': -1,
+        }
+
+        for _ in range(self.retry):
+            signature_str = self.generate_signature(method=method,
+                                                    accept=accept,
+                                                    content_type=content_type,
+                                                    date=date,
+                                                    url_path=self.url_path)
+            headers = self.generate_header(content_type=content_type,
+                                           accept=accept,
+                                           date=date,
+                                           signature=signature_str)
+
+            try:
+                response = requests.post(self.url, json=data, headers=headers)
+            except Exception as e:
+                print(e)
+                continue
+            try:
+                response = response.json()
+            except Exception as e:
+                print(e)
+                continue
+            print(response)
+            try:
+                return response['data']['choices'][0]['message']['content']
+            except Exception as e:
+                print(e)
+                continue
+
+        raise RuntimeError(f'Failed to respond in {self.retry} retrys')
--- a/opencompass/models/zhipuai_api.py
+++ b/opencompass/models/zhipuai_api.py
@@ -44,13 +44,13 @@ class ZhiPuAI(BaseAPIModel):

    def generate(
        self,
-        inputs: List[str or PromptList],
+        inputs: List[PromptType],
        max_out_len: int = 512,
    ) -> List[str]:
        """Generate results given a list of inputs.

        Args:
-            inputs (List[str or PromptList]): A list of strings or PromptDicts.
+            inputs (List[PromptType]): A list of strings or PromptDicts.
                The PromptDict should be organized in OpenCompass'
                API format.
            max_out_len (int): The maximum length of the output.
@@ -67,13 +67,13 @@ class ZhiPuAI(BaseAPIModel):

    def _generate(
        self,
-        input: str or PromptList,
+        input: PromptType,
        max_out_len: int = 512,
    ) -> str:
        """Generate results given an input.

        Args:
-            inputs (str or PromptList): A string or PromptDict.
+            inputs (PromptType): A string or PromptDict.
                The PromptDict should be organized in OpenCompass'
                API format.
            max_out_len (int): The maximum length of the output.

--- a/opencompass/models/zhipuai_v2_api.py
+++ b/opencompass/models/zhipuai_v2_api.py
@@ -2,8 +2,6 @@ import time
 from concurrent.futures import ThreadPoolExecutor
 from typing import Dict, List, Optional, Union

-from httpx import ProxyError
-
 from opencompass.utils.prompt import PromptList

 from .base_api import BaseAPIModel
@@ -59,13 +57,13 @@ class ZhiPuV2AI(BaseAPIModel):

    def generate(
        self,
-        inputs: List[str or PromptList],
+        inputs: List[PromptType],
        max_out_len: int = 512,
    ) -> List[str]:
        """Generate results given a list of inputs.

        Args:
-            inputs (List[str or PromptList]): A list of strings or PromptDicts.
+            inputs (List[PromptType]): A list of strings or PromptDicts.
                The PromptDict should be organized in OpenCompass'
                API format.
            max_out_len (int): The maximum length of the output.
@@ -82,13 +80,13 @@ class ZhiPuV2AI(BaseAPIModel):

    def _generate(
        self,
-        input: str or PromptList,
+        input: PromptType,
        max_out_len: int = 512,
    ) -> str:
        """Generate results given an input.

        Args:
-            inputs (str or PromptList): A string or PromptDict.
+            inputs (PromptType): A string or PromptDict.
                The PromptDict should be organized in OpenCompass'
                API format.
            max_out_len (int): The maximum length of the output.
@@ -103,6 +101,8 @@ class ZhiPuV2AI(BaseAPIModel):
        else:
            messages = []
            for item in input:
+                if not item['prompt']:
+                    continue
                msg = {'content': item['prompt']}
                if item['role'] == 'HUMAN':
                    msg['role'] = 'user'
@@ -115,11 +115,15 @@ class ZhiPuV2AI(BaseAPIModel):
        data = {'model': self.model, 'messages': messages}
        data.update(self.generation_kwargs)

+        from pprint import pprint
+        print('-' * 128)
+        pprint(data)
        max_num_retries = 0
        while max_num_retries < self.retry:
            self.acquire()

            response = None
+            from httpx import ProxyError

            try:
                response = self.client.chat.completions.create(**data)
@@ -161,6 +165,8 @@ class ZhiPuV2AI(BaseAPIModel):
            #     msg = response['data']['choices'][0]['content']
            else:
                msg = response.choices[0].message.content
+                print('=' * 128)
+                print(msg)
                return msg
            # sensitive content, prompt overlength, network error
            # or illegal prompt

--- a/opencompass/openicl/icl_evaluator/lm_evaluator.py
+++ b/opencompass/openicl/icl_evaluator/lm_evaluator.py
@@ -120,7 +120,7 @@ class LMEvaluator:
              meta: Optional[bool] = False,
              infer_order: Optional[str] = 'random') -> Dict:
        dup_indices = []
-        if type(predictions) == list:
+        if isinstance(predictions, list):
            """Apply to multi-model comparison."""
            references = [{} for _ in range(len(predictions[0]['model_preds']))
                          ] if references is None else references
@@ -137,7 +137,7 @@ class LMEvaluator:
                    if len(set(check)) == 1:
                        dup_indices.append(i)

-        elif type(predictions) == dict:
+        elif isinstance(predictions, dict):
            """Apply to single-model scoring."""
            references = [{} for _ in range(len(predictions[0]['model_preds']))
                          ] if references is None else references

--- a/opencompass/openicl/icl_prompt_template.py
+++ b/opencompass/openicl/icl_prompt_template.py
@@ -77,7 +77,7 @@ class PromptTemplate:
            label (:obj:`Hashable`): The value of the output field.

        Returns:
-            str or PromptList: The generated in-context example.
+            PromptType: The generated in-context example.
        """
        # Select the corresponding template
        if isinstance(self.template, str) or self.prompt_type == 'meta':
@@ -114,7 +114,7 @@ class PromptTemplate:

            entry (:obj:`Dict`): A piece of data containing the input field
                content.
-            ice (str or PromptList): The generated in-context example.
+            ice (PromptType): The generated in-context example.
            label (:obj:`Hashable`): The value of the output field.
            remain_sep (:obj:`bool`): If remain sep_token

@@ -165,7 +165,7 @@ class PromptTemplate:
                the :obj:`ice_token`. Defaults to ``''``.

        Returns:
-            str or PromptList: The generated item.
+            PromptType: The generated item.
        """
        template = None
        if isinstance(self.template, str):
@@ -220,7 +220,7 @@ class PromptTemplate:
                examples.

        Returns:
-            str or PromptList: The encoded template.
+            PromptType: The encoded template.
        """
        if isinstance(prompt_template, str):
            return prompt_template

--- a/opencompass/registry.py
+++ b/opencompass/registry.py
+from typing import Callable, List, Optional, Type, Union
+
 from mmengine.registry import DATASETS as MMENGINE_DATASETS
 from mmengine.registry import METRICS as MMENGINE_METRICS
 from mmengine.registry import MODELS as MMENGINE_MODELS
-from mmengine.registry import Registry
+from mmengine.registry import Registry as OriginalRegistry
+
+
+class Registry(OriginalRegistry):
+
+    # override the default force behavior
+    def register_module(
+            self,
+            name: Optional[Union[str, List[str]]] = None,
+            force: bool = True,
+            module: Optional[Type] = None) -> Union[type, Callable]:
+        return super().register_module(name, force, module)
+

 PARTITIONERS = Registry('partitioner', locations=['opencompass.partitioners'])
 RUNNERS = Registry('runner', locations=['opencompass.runners'])

--- a/opencompass/runners/dlc.py
+++ b/opencompass/runners/dlc.py
@@ -118,6 +118,7 @@ class DLCRunner(BaseRunner):
                conda_env_name = self.aliyun_cfg['conda_env_name']
                shell_cmd = (f'source {bashrc_path}; '
                             f'conda activate {conda_env_name}; ')
+                shell_cmd += f'export PYTHONPATH={pwd}:$PYTHONPATH; '
            else:
                # using public conda env
                # users can also set `python_env_path` to their
@@ -151,6 +152,11 @@ class DLCRunner(BaseRunner):
            if hf_endpoint is not None:
                shell_cmd += f'export HF_ENDPOINT={hf_endpoint}; '

+            extra_envs = self.aliyun_cfg.get('extra_envs')
+            if extra_envs is not None:
+                for extra_env in extra_envs:
+                    shell_cmd += f'export {extra_env}; '
+
            shell_cmd += f'cd {pwd}; '
            shell_cmd += '{task_cmd}'

@@ -161,9 +167,9 @@ class DLCRunner(BaseRunner):
                    f" -c {self.aliyun_cfg['dlc_config_path']}"
                    f" --workspace_id {self.aliyun_cfg['workspace_id']}"
                    ' --worker_count 1'
-                    f' --worker_cpu {max(num_gpus * 8, 32)}'
+                    f' --worker_cpu {max(num_gpus * 8, 12)}'
                    f' --worker_gpu {num_gpus}'
-                    f' --worker_memory {max(num_gpus * 128, 256)}'
+                    f' --worker_memory {max(num_gpus * 128, 192)}'
                    f" --worker_image {self.aliyun_cfg['worker_image']}")
            get_cmd = partial(task.get_command,
                              cfg_path=param_file,
@@ -185,14 +191,25 @@ class DLCRunner(BaseRunner):
                time.sleep(random.randint(0, 10))

            def _run_within_retry():
-                output = subprocess.getoutput(cmd)
-                match = re.search(r'\|\s+(dlc[0-9a-z]+)\s+\|', output)
-                if match is None:
-                    raise RuntimeError(
-                        f'Failed to launch dlc job for {output}')
+                num_retry_to_start = 5
+                index_to_start = 0
+                while index_to_start < num_retry_to_start:
+                    index_to_start += 1
+                    output = subprocess.getoutput(cmd)
+                    match = re.search(r'\|\s+(dlc[0-9a-z]+)\s+\|', output)
+                    if match is None:
+                        stdout.write('Failed to get job id from output:')
+                        stdout.write(output)
+                        if index_to_start < num_retry_to_start:
+                            stdout.write(f'Retry #{index_to_start} starting')
+                        time.sleep(2)
+                        continue
+                    else:
+                        job_id = match.group(1)
+                        stdout.write(output)
+                        break
                else:
-                    job_id = match.group(1)
-                stdout.write(output)
+                    raise RuntimeError(f'Cannot get job id from {output}')

                pod_create_time = None
                pri_time = None
@@ -200,7 +217,7 @@ class DLCRunner(BaseRunner):
                while True:
                    # 1. Avoid to request dlc too frequently.
                    # 2. DLC job may not be ready immediately after creation.
-                    for _ in range(5):
+                    for _ in range(20):
                        time.sleep(2)
                        try:
                            job_info = json.loads(

--- a/opencompass/summarizers/default.py
+++ b/opencompass/summarizers/default.py
@@ -17,7 +17,7 @@ from opencompass.utils import (LarkReporter, dataset_abbr_from_cfg,
 from opencompass.utils.prompt import get_prompt_hash

 METRIC_WHITELIST = ['score', 'auc_score', 'accuracy', 'humaneval_pass@1', 'rouge1', 'avg_toxicity_score', 'bleurt_diff', 'matthews_correlation', 'truth', 'f1', 'exact_match']
-METRIC_BLACKLIST = ['bp', 'sys_len', 'ref_len']
+METRIC_BLACKLIST = ['bp', 'sys_len', 'ref_len', 'tool_rate']

 def model_abbr_from_cfg_used_in_summarizer(model):
    if model.get('summarizer_abbr', None):

--- a/opencompass/summarizers/subjective/alignmentbench.py
+++ b/opencompass/summarizers/subjective/alignmentbench.py
@@ -218,8 +218,9 @@ def get_dimension_results(judged_answers, references, fout, fout_flag, model):

    dimension_avg_ratings = defaultdict(float)
    for dimension, total_score in dimension_ratings.items():
-        dimension_avg_ratings[
-            dimension] = total_score / dimension_counts[dimension]
+        s = total_score / dimension_counts[dimension]
+        s = round(s, 2)
+        dimension_avg_ratings[dimension] = s

    scores = {model: dimension_avg_ratings}
    rows = list(scores.keys())
@@ -249,8 +250,9 @@ def get_capability_results(judged_answers,
    capability_avg_ratings = defaultdict(float)

    for capability, total_score in capability_ratings.items():
-        capability_avg_ratings[
-            capability] = total_score / capability_counts[capability]
+        s = total_score / capability_counts[capability]
+        s = round(s, 2)
+        capability_avg_ratings[capability] = s

    temp_list = []
    total_column_num = 2
@@ -260,11 +262,14 @@ def get_capability_results(judged_answers,
            np.mean(capability_avg_ratings[cat])
            for cat in categories[category]
        ])
+        capability_avg_ratings[category + '总分'] = round(
+            capability_avg_ratings[category + '总分'], 2)
        temp_list.append(category + '总分')
    capability_avg_ratings['总分'] = 0
    for temp in temp_list:
        capability_avg_ratings['总分'] += capability_avg_ratings[temp]
    capability_avg_ratings['总分'] /= len(temp_list)
+    capability_avg_ratings['总分'] = round(capability_avg_ratings['总分'], 2)
    scores = {model: capability_avg_ratings}

    with open(fout, 'a+', newline='') as csvfile:
@@ -365,8 +370,10 @@ class AlignmentBenchSummarizer:
                    print(subdir_path + ' is not exist! please check!')
        if self.judge_type == 'general':
            with open(fout, 'r') as f:
-                x = from_csv(f)
+                x = from_csv(f, delimiter=',')
            print(x)
+            print(fout)
        with open(fout2, 'r') as f:
-            x = from_csv(f)
+            x = from_csv(f, delimiter=',')
        print(x)
+        print(fout2)
--- a/opencompass/summarizers/subjective/compass_arena.py
+++ b/opencompass/summarizers/subjective/compass_arena.py
@@ -229,4 +229,5 @@ class CompassArenaSummarizer:
        for fout in fout_list:
            with open(fout, 'r') as f:
                x = from_csv(f)
+            print(fout)
            print(x)
--- a/opencompass/summarizers/subjective/mtbench.py
+++ b/opencompass/summarizers/subjective/mtbench.py
@@ -65,8 +65,9 @@ def get_capability_results(
    capability_avg_ratings = defaultdict(float)

    for capability, total_score in capability_ratings.items():
-        capability_avg_ratings[
-            capability] = total_score / capability_counts[capability]
+        s = total_score / capability_counts[capability]
+        s = round(s, 2)
+        capability_avg_ratings[capability] = s
    columns = list(capability_avg_ratings.keys())
    columns.insert(0, columns.pop(columns.index('total')))
    with open(fout, 'a+', newline='') as csvfile:
@@ -142,5 +143,6 @@ class MTBenchSummarizer(CompassArenaSummarizer):
            with open(fout, 'r') as f:
                x = from_csv(f)
            print(x)
+            print(fout)
        elif self.judge_type == 'pair':
            super().summarize()
--- a/opencompass/tasks/openicl_infer.py
+++ b/opencompass/tasks/openicl_infer.py
@@ -43,9 +43,12 @@ class OpenICLInferTask(BaseTask):
                the command.
        """
        script_path = __file__
-        has_vllm = ('VLLM' in str(self.model_cfgs[0].get('type', ''))) or \
-            'VLLM' in str(self.model_cfgs[0].get('llm', {}).get('type', ''))
-        if self.num_gpus > 0 and not has_vllm:
+        backend_keys = ['VLLM', 'Lmdeploy']
+        use_backend = any(
+            key in str(self.model_cfgs[0].get('type', ''))
+            or key in str(self.model_cfgs[0].get('llm', {}).get('type', ''))
+            for key in backend_keys)
+        if self.num_gpus > 0 and not use_backend:
            port = random.randint(12000, 32000)
            command = (f'torchrun --master_port={port} '
                       f'--nproc_per_node {self.num_procs} '

--- a/opencompass/tasks/outer_eval/alpacaeval.py
+++ b/opencompass/tasks/outer_eval/alpacaeval.py
@@ -120,7 +120,10 @@ class AlpacaEvalTask(BaseTask):
            filename = get_infer_output_path(m_cfg, dataset_cfg,
                                             osp.join(work_dir, 'predictions'))
            output_path = osp.join(work_dir, 'results', m_cfg['abbr'])
-            command = f'export OPENAI_API_KEY={api_key}; alpaca_eval --model_outputs {filename} --annotators_config {alpaca_cfg} --output_path {output_path}'
+            command = ''
+            if api_key is not None:
+                command += f'export OPENAI_API_KEY={api_key}; '
+            command += f'alpaca_eval --model_outputs {filename} --annotators_config {alpaca_cfg} --output_path {output_path}'
            return template.format(task_cmd=command)

    def run(self):