Unverified Commit b39f5015 authored by Fengzhe Zhou's avatar Fengzhe Zhou Committed by GitHub
Browse files

[Sync] update taco (#1030)

parent 16f29b25
...@@ -48,13 +48,13 @@ class Qwen(BaseAPIModel): ...@@ -48,13 +48,13 @@ class Qwen(BaseAPIModel):
def generate( def generate(
self, self,
inputs: List[str or PromptList], inputs: List[PromptType],
max_out_len: int = 512, max_out_len: int = 512,
) -> List[str]: ) -> List[str]:
"""Generate results given a list of inputs. """Generate results given a list of inputs.
Args: Args:
inputs (List[str or PromptList]): A list of strings or PromptDicts. inputs (List[PromptType]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass' The PromptDict should be organized in OpenCompass'
API format. API format.
max_out_len (int): The maximum length of the output. max_out_len (int): The maximum length of the output.
...@@ -71,13 +71,13 @@ class Qwen(BaseAPIModel): ...@@ -71,13 +71,13 @@ class Qwen(BaseAPIModel):
def _generate( def _generate(
self, self,
input: str or PromptList, input: PromptType,
max_out_len: int = 512, max_out_len: int = 512,
) -> str: ) -> str:
"""Generate results given an input. """Generate results given an input.
Args: Args:
inputs (str or PromptList): A string or PromptDict. inputs (PromptType): A string or PromptDict.
The PromptDict should be organized in OpenCompass' The PromptDict should be organized in OpenCompass'
API format. API format.
max_out_len (int): The maximum length of the output. max_out_len (int): The maximum length of the output.
...@@ -103,16 +103,26 @@ class Qwen(BaseAPIModel): ...@@ -103,16 +103,26 @@ class Qwen(BaseAPIModel):
messages = [{'role': 'user', 'content': input}] messages = [{'role': 'user', 'content': input}]
else: else:
messages = [] messages = []
for item in input: msg_buffer, last_role = [], None
msg = {'content': item['prompt']} for index, item in enumerate(input):
if item['role'] == 'HUMAN': if index == 0 and item['role'] == 'SYSTEM':
msg['role'] = 'user' role = 'system'
elif item['role'] == 'BOT': elif item['role'] == 'BOT':
msg['role'] = 'assistant' role = 'assistant'
elif item['role'] == 'SYSTEM': else:
msg['role'] = 'system' role = 'user'
if role != last_role and last_role is not None:
messages.append(msg) messages.append({
'content': '\n'.join(msg_buffer),
'role': last_role
})
msg_buffer = []
msg_buffer.append(item['prompt'])
last_role = role
messages.append({
'content': '\n'.join(msg_buffer),
'role': last_role
})
data = {'messages': messages} data = {'messages': messages}
data.update(self.generation_kwargs) data.update(self.generation_kwargs)
...@@ -142,6 +152,8 @@ class Qwen(BaseAPIModel): ...@@ -142,6 +152,8 @@ class Qwen(BaseAPIModel):
if response.status_code == 200: if response.status_code == 200:
try: try:
msg = response.output.text msg = response.output.text
print('=' * 128)
print(msg)
return msg return msg
except KeyError: except KeyError:
print(response) print(response)
...@@ -153,6 +165,8 @@ class Qwen(BaseAPIModel): ...@@ -153,6 +165,8 @@ class Qwen(BaseAPIModel):
time.sleep(2) time.sleep(2)
continue continue
if response.status_code == 400: if response.status_code == 400:
print('=' * 128)
print(response)
msg = 'Output data may contain inappropriate content.' msg = 'Output data may contain inappropriate content.'
return msg return msg
......
...@@ -61,13 +61,13 @@ class SenseTime(BaseAPIModel): ...@@ -61,13 +61,13 @@ class SenseTime(BaseAPIModel):
def generate( def generate(
self, self,
inputs: List[str or PromptList], inputs: List[PromptType],
max_out_len: int = 512, max_out_len: int = 512,
) -> List[str]: ) -> List[str]:
"""Generate results given a list of inputs. """Generate results given a list of inputs.
Args: Args:
inputs (List[str or PromptList]): A list of strings or PromptDicts. inputs (List[PromptType]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass' The PromptDict should be organized in OpenCompass'
API format. API format.
max_out_len (int): The maximum length of the output. max_out_len (int): The maximum length of the output.
...@@ -84,13 +84,13 @@ class SenseTime(BaseAPIModel): ...@@ -84,13 +84,13 @@ class SenseTime(BaseAPIModel):
def _generate( def _generate(
self, self,
input: str or PromptList, input: PromptType,
max_out_len: int = 512, max_out_len: int = 512,
) -> str: ) -> str:
"""Generate results given an input. """Generate results given an input.
Args: Args:
inputs (str or PromptList): A string or PromptDict. inputs (PromptType): A string or PromptDict.
The PromptDict should be organized in OpenCompass' The PromptDict should be organized in OpenCompass'
API format. API format.
max_out_len (int): The maximum length of the output. max_out_len (int): The maximum length of the output.
...@@ -114,7 +114,8 @@ class SenseTime(BaseAPIModel): ...@@ -114,7 +114,8 @@ class SenseTime(BaseAPIModel):
messages.append(msg) messages.append(msg)
data = {'messages': messages, 'model': self.model} data = {'messages': messages, 'model': self.model}
data.update(self.params) if self.params is not None:
data.update(self.params)
stream = data['stream'] stream = data['stream']
...@@ -123,10 +124,14 @@ class SenseTime(BaseAPIModel): ...@@ -123,10 +124,14 @@ class SenseTime(BaseAPIModel):
self.acquire() self.acquire()
max_num_retries += 1 max_num_retries += 1
raw_response = requests.request('POST', try:
url=self.url, raw_response = requests.request('POST',
headers=self.headers, url=self.url,
json=data) headers=self.headers,
json=data)
except Exception:
time.sleep(1)
continue
requests_id = raw_response.headers['X-Request-Id'] # noqa requests_id = raw_response.headers['X-Request-Id'] # noqa
self.release() self.release()
......
import copy
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from typing import Dict, List, Optional, Union from typing import Dict, List, Optional, Union
...@@ -70,11 +71,10 @@ class TurboMindModel(BaseModel): ...@@ -70,11 +71,10 @@ class TurboMindModel(BaseModel):
self.gen_config = gen_config self.gen_config = gen_config
self.end_str = end_str self.end_str = end_str
def generate( def generate(self,
self, inputs: List[str],
inputs: List[str], max_out_len: int = 512,
max_out_len: int = 512, **kwargs) -> List[str]:
) -> List[str]:
"""Generate results given a list of inputs. """Generate results given a list of inputs.
Args: Args:
...@@ -93,6 +93,15 @@ class TurboMindModel(BaseModel): ...@@ -93,6 +93,15 @@ class TurboMindModel(BaseModel):
inputs[i:i + batch_size] for i in range(0, len(inputs), batch_size) inputs[i:i + batch_size] for i in range(0, len(inputs), batch_size)
] ]
gen_config = copy.deepcopy(self.gen_config)
if 'do_sample' in kwargs:
if kwargs['do_sample']:
gen_config.top_k = 1000
gen_config.temperature = kwargs.get('temperature', 1)
else:
gen_config.top_k = 1
gen_config.temperature = 0.01
results = [] results = []
for batch_input in batch_inputs: for batch_input in batch_inputs:
with ThreadPoolExecutor() as executor: with ThreadPoolExecutor() as executor:
...@@ -103,7 +112,7 @@ class TurboMindModel(BaseModel): ...@@ -103,7 +112,7 @@ class TurboMindModel(BaseModel):
self.generator_ids[:len(batch_input)], self.generator_ids[:len(batch_input)],
batch_input, batch_input,
[max_out_len] * len(batch_input), [max_out_len] * len(batch_input),
[self.gen_config] * len(batch_input), [gen_config] * len(batch_input),
[self.end_str] * len(batch_input), [self.end_str] * len(batch_input),
)) ))
results += _results results += _results
...@@ -123,14 +132,14 @@ class TurboMindModel(BaseModel): ...@@ -123,14 +132,14 @@ class TurboMindModel(BaseModel):
def _generate(self, def _generate(self,
generator, generator,
session_id, session_id,
prompt: str or PromptList, prompt: PromptType,
max_out_len: int, max_out_len: int,
gen_config=None, gen_config=None,
end_str: Optional[str] = None) -> str: end_str: Optional[str] = None) -> str:
"""Generate results given a list of inputs. """Generate results given a list of inputs.
Args: Args:
prompt (str or PromptList): A string or PromptDict. prompt (PromptType): A string or PromptDict.
The PromptDict should be organized in OpenCompass' The PromptDict should be organized in OpenCompass'
API format. API format.
max_out_len (int): The maximum length of the output. max_out_len (int): The maximum length of the output.
...@@ -189,3 +198,22 @@ class TurboMindModel(BaseModel): ...@@ -189,3 +198,22 @@ class TurboMindModel(BaseModel):
results.append(res) results.append(res)
results = np.concatenate(results) results = np.concatenate(results)
return results return results
def get_loglikelihood(
self,
inputs: List[str],
conts: List[str],
mask_length: Optional[List[int]] = None) -> List[float]:
assert isinstance(
inputs, List), f'List(str) is expected, but got {type(inputs)}'
results = []
for text, cont in zip(inputs, conts):
input_ids = self.tokenizer.encode(text)
res = self.generators[0].get_ppl(input_ids)
logit_sum = res * len(input_ids)
input_ids = self.tokenizer.encode(text.replace(cont, ''))
res = self.generators[0].get_ppl(input_ids)
logit_part = res * len(input_ids)
results.append(-(logit_sum - logit_part))
results = np.concatenate(results)
return results
...@@ -60,14 +60,14 @@ class TurboMindAPIModel(BaseModel): ...@@ -60,14 +60,14 @@ class TurboMindAPIModel(BaseModel):
def generate( def generate(
self, self,
inputs: List[str or PromptList], inputs: List[PromptType],
max_out_len: int = 512, max_out_len: int = 512,
temperature: float = 1.0, temperature: float = 1.0,
) -> List[str]: ) -> List[str]:
"""Generate results given a list of inputs. """Generate results given a list of inputs.
Args: Args:
inputs (List[str or PromptList]): A list of strings or PromptDicts. inputs (List[PromptType]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass' The PromptDict should be organized in OpenCompass'
API format. API format.
max_out_len (int): The maximum length of the output. max_out_len (int): The maximum length of the output.
...@@ -102,12 +102,12 @@ class TurboMindAPIModel(BaseModel): ...@@ -102,12 +102,12 @@ class TurboMindAPIModel(BaseModel):
""" """
return self.token_bucket.get_token() return self.token_bucket.get_token()
def _generate(self, prompt: str or PromptList, max_out_len: int, def _generate(self, prompt: PromptType, max_out_len: int,
temperature: float, end_str: str) -> str: temperature: float, end_str: str) -> str:
"""Generate results given a list of inputs. """Generate results given a list of inputs.
Args: Args:
prompt (str or PromptList): A string or PromptDict. prompt (PromptType): A string or PromptDict.
The PromptDict should be organized in OpenCompass' The PromptDict should be organized in OpenCompass'
API format. API format.
max_out_len (int): The maximum length of the output. max_out_len (int): The maximum length of the output.
......
...@@ -58,14 +58,14 @@ class TurboMindTisModel(BaseModel): ...@@ -58,14 +58,14 @@ class TurboMindTisModel(BaseModel):
def generate( def generate(
self, self,
inputs: List[str or PromptList], inputs: List[PromptType],
max_out_len: int = 512, max_out_len: int = 512,
temperature: float = 1.0, temperature: float = 1.0,
) -> List[str]: ) -> List[str]:
"""Generate results given a list of inputs. """Generate results given a list of inputs.
Args: Args:
inputs (List[str or PromptList]): A list of strings or PromptDicts. inputs (List[PromptType]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass' The PromptDict should be organized in OpenCompass'
API format. API format.
max_out_len (int): The maximum length of the output. max_out_len (int): The maximum length of the output.
...@@ -96,12 +96,12 @@ class TurboMindTisModel(BaseModel): ...@@ -96,12 +96,12 @@ class TurboMindTisModel(BaseModel):
""" """
return self.token_bucket.get_token() return self.token_bucket.get_token()
def _generate(self, prompt: str or PromptList, max_out_len: int, def _generate(self, prompt: PromptType, max_out_len: int,
temperature: float) -> str: temperature: float) -> str:
"""Generate results given a list of inputs. """Generate results given a list of inputs.
Args: Args:
prompt (str or PromptList): A string or PromptDict. prompt (PromptType): A string or PromptDict.
The PromptDict should be organized in OpenCompass' The PromptDict should be organized in OpenCompass'
API format. API format.
max_out_len (int): The maximum length of the output. max_out_len (int): The maximum length of the output.
......
import hashlib
import time
import uuid
from concurrent.futures import ThreadPoolExecutor
from typing import Dict, List, Optional, Union
import requests
from opencompass.utils.prompt import PromptList
from .base_api import BaseAPIModel
PromptType = Union[PromptList, str]
def get_sign(appkey, udid, timestamp, secret):
original_str = f'{appkey}{udid}{timestamp}{secret}'
sign = ''
try:
md = hashlib.sha256()
md.update(original_str.encode('utf-8'))
bytes_result = md.digest()
for byte in bytes_result:
hex_value = format(byte, '02X')
sign += hex_value.upper()
except Exception as e:
print(e)
return sign
class UniGPT(BaseAPIModel):
def __init__(
self,
path: str,
appkey: str,
secret: str,
url: str,
query_per_second: int = 2,
max_seq_len: int = 2048,
meta_template: Optional[Dict] = None,
retry: int = 2,
temperature: float = 0.2,
): # noqa E125
super().__init__(
path=path,
max_seq_len=max_seq_len,
query_per_second=query_per_second,
meta_template=meta_template,
retry=retry,
)
self.appkey = appkey
self.secret = secret
self.udid = str(uuid.uuid1())
self.url = url
self.model = path
self.temperature = temperature
def generate(self,
inputs: List[PromptType],
max_out_len: int = 512) -> List[str]:
"""Generate results given a list of inputs.
Args:
inputs (List[PromptType]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
Returns:
List[str]: A list of generated strings.
"""
with ThreadPoolExecutor() as executor:
results = list(
executor.map(self._generate, inputs,
[max_out_len] * len(inputs)))
self.flush()
return results
def _generate(self, input: PromptType, max_out_len: int = 512) -> str:
"""Generate results given an input.
Args:
inputs (PromptType): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
Returns:
str: The generated string.
"""
assert isinstance(input, (str, PromptList))
if isinstance(input, str):
messages = [{'role': 'user', 'content': input}]
else:
messages = []
for item in input:
msg = {'content': item['prompt']}
if item['role'] == 'HUMAN':
msg['role'] = 'user'
elif item['role'] == 'BOT':
msg['role'] = 'assistant'
elif item['role'] == 'SYSTEM':
msg['role'] = 'system'
messages.append(msg)
data = {
'model': self.path,
'temperature': self.temperature,
'messages': messages,
'max_tokens': max_out_len,
}
timestamp = str(int(time.time()) * 1000)
headers = {
'appkey': self.appkey,
'sign': get_sign(self.appkey, self.udid, timestamp, self.secret),
'stream': 'false',
'timestamp': timestamp,
'udid': self.udid,
'censor': 'none',
}
for _ in range(self.retry):
try:
response = requests.post(self.url, json=data, headers=headers)
except Exception as e:
print(e)
continue
if response is None or response.status_code != 200:
code = response.status_code if response else -1
print(f'request err, status_code: {code}')
time.sleep(10)
continue
try:
response = response.json()
except Exception as e:
print(e)
continue
print(response)
if response.get('errorCode') == '8500502':
return 'context_length_exceeded'
return response['result']['choices'][0]['message']['content']
raise RuntimeError(f'Failed to respond in {self.retry} retrys')
...@@ -98,13 +98,13 @@ class XunFei(BaseAPIModel): ...@@ -98,13 +98,13 @@ class XunFei(BaseAPIModel):
def generate( def generate(
self, self,
inputs: List[str or PromptList], inputs: List[PromptType],
max_out_len: int = 512, max_out_len: int = 512,
) -> List[str]: ) -> List[str]:
"""Generate results given a list of inputs. """Generate results given a list of inputs.
Args: Args:
inputs (List[str or PromptList]): A list of strings or PromptDicts. inputs (List[PromptType]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass' The PromptDict should be organized in OpenCompass'
API format. API format.
max_out_len (int): The maximum length of the output. max_out_len (int): The maximum length of the output.
...@@ -121,13 +121,13 @@ class XunFei(BaseAPIModel): ...@@ -121,13 +121,13 @@ class XunFei(BaseAPIModel):
def _generate( def _generate(
self, self,
input: str or PromptList, input: PromptType,
max_out_len: int = 512, max_out_len: int = 512,
) -> List[str]: ) -> List[str]:
"""Generate results given an input. """Generate results given an input.
Args: Args:
inputs (str or PromptList): A string or PromptDict. inputs (PromptType): A string or PromptDict.
The PromptDict should be organized in OpenCompass' The PromptDict should be organized in OpenCompass'
API format. API format.
max_out_len (int): The maximum length of the output. max_out_len (int): The maximum length of the output.
......
import base64
import hashlib
import hmac
import random
import string
import time
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from typing import Dict, List, Optional, Union
import requests
from opencompass.utils.prompt import PromptList
from .base_api import BaseAPIModel
PromptType = Union[PromptList, str]
def generate_random_string(length=16):
"""生成随机串.
:param length: 随机串长度,默认为 16
:return: 随机串
"""
letters = string.ascii_letters + string.digits
rand_str = ''.join(random.choice(letters) for i in range(length))
return rand_str
def get_current_time(format='%Y-%m-%d %H:%M:%S'):
"""获取当前时间.
:param format: 时间格式,默认为 '%H:%M:%S'
:return: 当前时间字符串
"""
now = datetime.now()
time_str = now.strftime(format)
return time_str
def get_current_timestamp():
"""
获取当前时间时间戳
:return:
"""
timestamp_str = int(round(time.time() * 1000))
return str(timestamp_str)
def encode_base64_string(s):
"""对字符串进行 Base64 编码.
:param s: 字符串
:return: 编码后的字符串
"""
encoded = base64.b64encode(s).decode()
return encoded
def get_current_time_gmt_format():
"""
获取当前时间的GMT 时间
:return:
"""
GMT_FORMAT = '%a, %d %b %Y %H:%M:%SGMT+00:00'
now = datetime.now()
time_str = now.strftime(GMT_FORMAT)
return time_str
class Yayi(BaseAPIModel):
"""Model wrapper around SenseTime.
Args:
path (str): The name of SenseTime model.
e.g. `nova-ptc-xl-v1`
key (str): Authorization key.
query_per_second (int): The maximum queries allowed per second
between two consecutive calls of the API. Defaults to 1.
max_seq_len (int): Unused here.
meta_template (Dict, optional): The model's meta prompt
template if needed, in case the requirement of injecting or
wrapping of any meta instructions.
retry (int): Number of retires if the API call fails. Defaults to 2.
"""
def __init__(
self,
path: str,
url: str,
url_path: str,
x_tilake_app_key: str,
x_tilake_app_secret: str,
x_tilake_ca_sginature_method: str,
query_per_second: int = 2,
max_seq_len: int = 2048,
meta_template: Optional[Dict] = None,
retry: int = 2,
temperature: float = 0.4,
):
super().__init__(
path=path,
max_seq_len=max_seq_len,
query_per_second=query_per_second,
meta_template=meta_template,
retry=retry,
)
self.url = url
self.url_path = url_path
self.X_TILAKE_APP_KEY = x_tilake_app_key
self.X_TILAKE_APP_SECRET = x_tilake_app_secret
self.X_TILAKE_CA_SGINATURE_METHOD = x_tilake_ca_sginature_method
self.temperature = temperature
self.model = path
def generate_signature(self, method, accept, content_type, date, url_path):
"""生成签名.
:param method:
:param accept:
:param content_type:
:param date:
:param url_path:
:return:
"""
string_to_sign = (method + '\n' + accept + '\n' + content_type + '\n' +
date + '\n' + url_path)
string_to_sign = string_to_sign.encode('utf-8')
secret_key = self.X_TILAKE_APP_SECRET.encode('utf-8')
signature = hmac.new(secret_key, string_to_sign,
hashlib.sha256).digest()
return encode_base64_string(signature)
def generate_header(self, content_type, accept, date, signature):
"""生成请求头参数.
:param content_type:
:param accept:
:return:
"""
headers = {
'x-tilake-app-key': self.X_TILAKE_APP_KEY,
'x-tilake-ca-signature-method': self.X_TILAKE_CA_SGINATURE_METHOD,
'x-tilake-ca-timestamp': get_current_timestamp(),
'x-tilake-ca-nonce': generate_random_string(),
'x-tilake-ca-signature': signature,
'Date': date,
'Content-Type': content_type,
'Accept': accept,
}
return headers
def generate(
self,
inputs: List[PromptType],
max_out_len: int = 512,
) -> List[str]:
"""Generate results given a list of inputs.
Args:
inputs (List[PromptType]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
Returns:
List[str]: A list of generated strings.
"""
with ThreadPoolExecutor() as executor:
results = list(
executor.map(self._generate, inputs,
[max_out_len] * len(inputs)))
self.flush()
return results
def _generate(
self,
input: PromptType,
max_out_len: int = 512,
) -> str:
"""Generate results given an input.
Args:
inputs (PromptType): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
Returns:
str: The generated string.
"""
assert isinstance(input, (str, PromptList))
if isinstance(input, str):
messages = [{'role': 'user', 'content': input}]
else:
messages = []
msg_buffer, last_role = [], None
for item in input:
item['role'] = 'yayi' if item['role'] == 'BOT' else 'user'
if item['role'] != last_role and last_role is not None:
messages.append({
'content': '\n'.join(msg_buffer),
'role': last_role
})
msg_buffer = []
msg_buffer.append(item['prompt'])
last_role = item['role']
messages.append({
'content': '\n'.join(msg_buffer),
'role': last_role
})
date = get_current_time_gmt_format()
content_type = 'application/json'
accept = '*/*'
method = 'POST'
data = {
'id': '001', # 请求id,无需修改。
'model': self.model,
'messages': messages,
'max_new_tokens': max_out_len, # max_new_tokens及以下参数可根据实际任务进行调整。
'temperature': self.temperature,
'presence_penalty': 0.85,
'frequency_penalty': 0.16,
'do_sample': True,
'top_p': 1.0,
'top_k': -1,
}
for _ in range(self.retry):
signature_str = self.generate_signature(method=method,
accept=accept,
content_type=content_type,
date=date,
url_path=self.url_path)
headers = self.generate_header(content_type=content_type,
accept=accept,
date=date,
signature=signature_str)
try:
response = requests.post(self.url, json=data, headers=headers)
except Exception as e:
print(e)
continue
try:
response = response.json()
except Exception as e:
print(e)
continue
print(response)
try:
return response['data']['choices'][0]['message']['content']
except Exception as e:
print(e)
continue
raise RuntimeError(f'Failed to respond in {self.retry} retrys')
...@@ -44,13 +44,13 @@ class ZhiPuAI(BaseAPIModel): ...@@ -44,13 +44,13 @@ class ZhiPuAI(BaseAPIModel):
def generate( def generate(
self, self,
inputs: List[str or PromptList], inputs: List[PromptType],
max_out_len: int = 512, max_out_len: int = 512,
) -> List[str]: ) -> List[str]:
"""Generate results given a list of inputs. """Generate results given a list of inputs.
Args: Args:
inputs (List[str or PromptList]): A list of strings or PromptDicts. inputs (List[PromptType]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass' The PromptDict should be organized in OpenCompass'
API format. API format.
max_out_len (int): The maximum length of the output. max_out_len (int): The maximum length of the output.
...@@ -67,13 +67,13 @@ class ZhiPuAI(BaseAPIModel): ...@@ -67,13 +67,13 @@ class ZhiPuAI(BaseAPIModel):
def _generate( def _generate(
self, self,
input: str or PromptList, input: PromptType,
max_out_len: int = 512, max_out_len: int = 512,
) -> str: ) -> str:
"""Generate results given an input. """Generate results given an input.
Args: Args:
inputs (str or PromptList): A string or PromptDict. inputs (PromptType): A string or PromptDict.
The PromptDict should be organized in OpenCompass' The PromptDict should be organized in OpenCompass'
API format. API format.
max_out_len (int): The maximum length of the output. max_out_len (int): The maximum length of the output.
......
...@@ -2,8 +2,6 @@ import time ...@@ -2,8 +2,6 @@ import time
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from typing import Dict, List, Optional, Union from typing import Dict, List, Optional, Union
from httpx import ProxyError
from opencompass.utils.prompt import PromptList from opencompass.utils.prompt import PromptList
from .base_api import BaseAPIModel from .base_api import BaseAPIModel
...@@ -59,13 +57,13 @@ class ZhiPuV2AI(BaseAPIModel): ...@@ -59,13 +57,13 @@ class ZhiPuV2AI(BaseAPIModel):
def generate( def generate(
self, self,
inputs: List[str or PromptList], inputs: List[PromptType],
max_out_len: int = 512, max_out_len: int = 512,
) -> List[str]: ) -> List[str]:
"""Generate results given a list of inputs. """Generate results given a list of inputs.
Args: Args:
inputs (List[str or PromptList]): A list of strings or PromptDicts. inputs (List[PromptType]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass' The PromptDict should be organized in OpenCompass'
API format. API format.
max_out_len (int): The maximum length of the output. max_out_len (int): The maximum length of the output.
...@@ -82,13 +80,13 @@ class ZhiPuV2AI(BaseAPIModel): ...@@ -82,13 +80,13 @@ class ZhiPuV2AI(BaseAPIModel):
def _generate( def _generate(
self, self,
input: str or PromptList, input: PromptType,
max_out_len: int = 512, max_out_len: int = 512,
) -> str: ) -> str:
"""Generate results given an input. """Generate results given an input.
Args: Args:
inputs (str or PromptList): A string or PromptDict. inputs (PromptType): A string or PromptDict.
The PromptDict should be organized in OpenCompass' The PromptDict should be organized in OpenCompass'
API format. API format.
max_out_len (int): The maximum length of the output. max_out_len (int): The maximum length of the output.
...@@ -103,6 +101,8 @@ class ZhiPuV2AI(BaseAPIModel): ...@@ -103,6 +101,8 @@ class ZhiPuV2AI(BaseAPIModel):
else: else:
messages = [] messages = []
for item in input: for item in input:
if not item['prompt']:
continue
msg = {'content': item['prompt']} msg = {'content': item['prompt']}
if item['role'] == 'HUMAN': if item['role'] == 'HUMAN':
msg['role'] = 'user' msg['role'] = 'user'
...@@ -115,11 +115,15 @@ class ZhiPuV2AI(BaseAPIModel): ...@@ -115,11 +115,15 @@ class ZhiPuV2AI(BaseAPIModel):
data = {'model': self.model, 'messages': messages} data = {'model': self.model, 'messages': messages}
data.update(self.generation_kwargs) data.update(self.generation_kwargs)
from pprint import pprint
print('-' * 128)
pprint(data)
max_num_retries = 0 max_num_retries = 0
while max_num_retries < self.retry: while max_num_retries < self.retry:
self.acquire() self.acquire()
response = None response = None
from httpx import ProxyError
try: try:
response = self.client.chat.completions.create(**data) response = self.client.chat.completions.create(**data)
...@@ -161,6 +165,8 @@ class ZhiPuV2AI(BaseAPIModel): ...@@ -161,6 +165,8 @@ class ZhiPuV2AI(BaseAPIModel):
# msg = response['data']['choices'][0]['content'] # msg = response['data']['choices'][0]['content']
else: else:
msg = response.choices[0].message.content msg = response.choices[0].message.content
print('=' * 128)
print(msg)
return msg return msg
# sensitive content, prompt overlength, network error # sensitive content, prompt overlength, network error
# or illegal prompt # or illegal prompt
......
...@@ -120,7 +120,7 @@ class LMEvaluator: ...@@ -120,7 +120,7 @@ class LMEvaluator:
meta: Optional[bool] = False, meta: Optional[bool] = False,
infer_order: Optional[str] = 'random') -> Dict: infer_order: Optional[str] = 'random') -> Dict:
dup_indices = [] dup_indices = []
if type(predictions) == list: if isinstance(predictions, list):
"""Apply to multi-model comparison.""" """Apply to multi-model comparison."""
references = [{} for _ in range(len(predictions[0]['model_preds'])) references = [{} for _ in range(len(predictions[0]['model_preds']))
] if references is None else references ] if references is None else references
...@@ -137,7 +137,7 @@ class LMEvaluator: ...@@ -137,7 +137,7 @@ class LMEvaluator:
if len(set(check)) == 1: if len(set(check)) == 1:
dup_indices.append(i) dup_indices.append(i)
elif type(predictions) == dict: elif isinstance(predictions, dict):
"""Apply to single-model scoring.""" """Apply to single-model scoring."""
references = [{} for _ in range(len(predictions[0]['model_preds'])) references = [{} for _ in range(len(predictions[0]['model_preds']))
] if references is None else references ] if references is None else references
......
...@@ -77,7 +77,7 @@ class PromptTemplate: ...@@ -77,7 +77,7 @@ class PromptTemplate:
label (:obj:`Hashable`): The value of the output field. label (:obj:`Hashable`): The value of the output field.
Returns: Returns:
str or PromptList: The generated in-context example. PromptType: The generated in-context example.
""" """
# Select the corresponding template # Select the corresponding template
if isinstance(self.template, str) or self.prompt_type == 'meta': if isinstance(self.template, str) or self.prompt_type == 'meta':
...@@ -114,7 +114,7 @@ class PromptTemplate: ...@@ -114,7 +114,7 @@ class PromptTemplate:
entry (:obj:`Dict`): A piece of data containing the input field entry (:obj:`Dict`): A piece of data containing the input field
content. content.
ice (str or PromptList): The generated in-context example. ice (PromptType): The generated in-context example.
label (:obj:`Hashable`): The value of the output field. label (:obj:`Hashable`): The value of the output field.
remain_sep (:obj:`bool`): If remain sep_token remain_sep (:obj:`bool`): If remain sep_token
...@@ -165,7 +165,7 @@ class PromptTemplate: ...@@ -165,7 +165,7 @@ class PromptTemplate:
the :obj:`ice_token`. Defaults to ``''``. the :obj:`ice_token`. Defaults to ``''``.
Returns: Returns:
str or PromptList: The generated item. PromptType: The generated item.
""" """
template = None template = None
if isinstance(self.template, str): if isinstance(self.template, str):
...@@ -220,7 +220,7 @@ class PromptTemplate: ...@@ -220,7 +220,7 @@ class PromptTemplate:
examples. examples.
Returns: Returns:
str or PromptList: The encoded template. PromptType: The encoded template.
""" """
if isinstance(prompt_template, str): if isinstance(prompt_template, str):
return prompt_template return prompt_template
......
from typing import Callable, List, Optional, Type, Union
from mmengine.registry import DATASETS as MMENGINE_DATASETS from mmengine.registry import DATASETS as MMENGINE_DATASETS
from mmengine.registry import METRICS as MMENGINE_METRICS from mmengine.registry import METRICS as MMENGINE_METRICS
from mmengine.registry import MODELS as MMENGINE_MODELS from mmengine.registry import MODELS as MMENGINE_MODELS
from mmengine.registry import Registry from mmengine.registry import Registry as OriginalRegistry
class Registry(OriginalRegistry):
# override the default force behavior
def register_module(
self,
name: Optional[Union[str, List[str]]] = None,
force: bool = True,
module: Optional[Type] = None) -> Union[type, Callable]:
return super().register_module(name, force, module)
PARTITIONERS = Registry('partitioner', locations=['opencompass.partitioners']) PARTITIONERS = Registry('partitioner', locations=['opencompass.partitioners'])
RUNNERS = Registry('runner', locations=['opencompass.runners']) RUNNERS = Registry('runner', locations=['opencompass.runners'])
......
...@@ -118,6 +118,7 @@ class DLCRunner(BaseRunner): ...@@ -118,6 +118,7 @@ class DLCRunner(BaseRunner):
conda_env_name = self.aliyun_cfg['conda_env_name'] conda_env_name = self.aliyun_cfg['conda_env_name']
shell_cmd = (f'source {bashrc_path}; ' shell_cmd = (f'source {bashrc_path}; '
f'conda activate {conda_env_name}; ') f'conda activate {conda_env_name}; ')
shell_cmd += f'export PYTHONPATH={pwd}:$PYTHONPATH; '
else: else:
# using public conda env # using public conda env
# users can also set `python_env_path` to their # users can also set `python_env_path` to their
...@@ -151,6 +152,11 @@ class DLCRunner(BaseRunner): ...@@ -151,6 +152,11 @@ class DLCRunner(BaseRunner):
if hf_endpoint is not None: if hf_endpoint is not None:
shell_cmd += f'export HF_ENDPOINT={hf_endpoint}; ' shell_cmd += f'export HF_ENDPOINT={hf_endpoint}; '
extra_envs = self.aliyun_cfg.get('extra_envs')
if extra_envs is not None:
for extra_env in extra_envs:
shell_cmd += f'export {extra_env}; '
shell_cmd += f'cd {pwd}; ' shell_cmd += f'cd {pwd}; '
shell_cmd += '{task_cmd}' shell_cmd += '{task_cmd}'
...@@ -161,9 +167,9 @@ class DLCRunner(BaseRunner): ...@@ -161,9 +167,9 @@ class DLCRunner(BaseRunner):
f" -c {self.aliyun_cfg['dlc_config_path']}" f" -c {self.aliyun_cfg['dlc_config_path']}"
f" --workspace_id {self.aliyun_cfg['workspace_id']}" f" --workspace_id {self.aliyun_cfg['workspace_id']}"
' --worker_count 1' ' --worker_count 1'
f' --worker_cpu {max(num_gpus * 8, 32)}' f' --worker_cpu {max(num_gpus * 8, 12)}'
f' --worker_gpu {num_gpus}' f' --worker_gpu {num_gpus}'
f' --worker_memory {max(num_gpus * 128, 256)}' f' --worker_memory {max(num_gpus * 128, 192)}'
f" --worker_image {self.aliyun_cfg['worker_image']}") f" --worker_image {self.aliyun_cfg['worker_image']}")
get_cmd = partial(task.get_command, get_cmd = partial(task.get_command,
cfg_path=param_file, cfg_path=param_file,
...@@ -185,14 +191,25 @@ class DLCRunner(BaseRunner): ...@@ -185,14 +191,25 @@ class DLCRunner(BaseRunner):
time.sleep(random.randint(0, 10)) time.sleep(random.randint(0, 10))
def _run_within_retry(): def _run_within_retry():
output = subprocess.getoutput(cmd) num_retry_to_start = 5
match = re.search(r'\|\s+(dlc[0-9a-z]+)\s+\|', output) index_to_start = 0
if match is None: while index_to_start < num_retry_to_start:
raise RuntimeError( index_to_start += 1
f'Failed to launch dlc job for {output}') output = subprocess.getoutput(cmd)
match = re.search(r'\|\s+(dlc[0-9a-z]+)\s+\|', output)
if match is None:
stdout.write('Failed to get job id from output:')
stdout.write(output)
if index_to_start < num_retry_to_start:
stdout.write(f'Retry #{index_to_start} starting')
time.sleep(2)
continue
else:
job_id = match.group(1)
stdout.write(output)
break
else: else:
job_id = match.group(1) raise RuntimeError(f'Cannot get job id from {output}')
stdout.write(output)
pod_create_time = None pod_create_time = None
pri_time = None pri_time = None
...@@ -200,7 +217,7 @@ class DLCRunner(BaseRunner): ...@@ -200,7 +217,7 @@ class DLCRunner(BaseRunner):
while True: while True:
# 1. Avoid to request dlc too frequently. # 1. Avoid to request dlc too frequently.
# 2. DLC job may not be ready immediately after creation. # 2. DLC job may not be ready immediately after creation.
for _ in range(5): for _ in range(20):
time.sleep(2) time.sleep(2)
try: try:
job_info = json.loads( job_info = json.loads(
......
...@@ -17,7 +17,7 @@ from opencompass.utils import (LarkReporter, dataset_abbr_from_cfg, ...@@ -17,7 +17,7 @@ from opencompass.utils import (LarkReporter, dataset_abbr_from_cfg,
from opencompass.utils.prompt import get_prompt_hash from opencompass.utils.prompt import get_prompt_hash
METRIC_WHITELIST = ['score', 'auc_score', 'accuracy', 'humaneval_pass@1', 'rouge1', 'avg_toxicity_score', 'bleurt_diff', 'matthews_correlation', 'truth', 'f1', 'exact_match'] METRIC_WHITELIST = ['score', 'auc_score', 'accuracy', 'humaneval_pass@1', 'rouge1', 'avg_toxicity_score', 'bleurt_diff', 'matthews_correlation', 'truth', 'f1', 'exact_match']
METRIC_BLACKLIST = ['bp', 'sys_len', 'ref_len'] METRIC_BLACKLIST = ['bp', 'sys_len', 'ref_len', 'tool_rate']
def model_abbr_from_cfg_used_in_summarizer(model): def model_abbr_from_cfg_used_in_summarizer(model):
if model.get('summarizer_abbr', None): if model.get('summarizer_abbr', None):
......
...@@ -218,8 +218,9 @@ def get_dimension_results(judged_answers, references, fout, fout_flag, model): ...@@ -218,8 +218,9 @@ def get_dimension_results(judged_answers, references, fout, fout_flag, model):
dimension_avg_ratings = defaultdict(float) dimension_avg_ratings = defaultdict(float)
for dimension, total_score in dimension_ratings.items(): for dimension, total_score in dimension_ratings.items():
dimension_avg_ratings[ s = total_score / dimension_counts[dimension]
dimension] = total_score / dimension_counts[dimension] s = round(s, 2)
dimension_avg_ratings[dimension] = s
scores = {model: dimension_avg_ratings} scores = {model: dimension_avg_ratings}
rows = list(scores.keys()) rows = list(scores.keys())
...@@ -249,8 +250,9 @@ def get_capability_results(judged_answers, ...@@ -249,8 +250,9 @@ def get_capability_results(judged_answers,
capability_avg_ratings = defaultdict(float) capability_avg_ratings = defaultdict(float)
for capability, total_score in capability_ratings.items(): for capability, total_score in capability_ratings.items():
capability_avg_ratings[ s = total_score / capability_counts[capability]
capability] = total_score / capability_counts[capability] s = round(s, 2)
capability_avg_ratings[capability] = s
temp_list = [] temp_list = []
total_column_num = 2 total_column_num = 2
...@@ -260,11 +262,14 @@ def get_capability_results(judged_answers, ...@@ -260,11 +262,14 @@ def get_capability_results(judged_answers,
np.mean(capability_avg_ratings[cat]) np.mean(capability_avg_ratings[cat])
for cat in categories[category] for cat in categories[category]
]) ])
capability_avg_ratings[category + '总分'] = round(
capability_avg_ratings[category + '总分'], 2)
temp_list.append(category + '总分') temp_list.append(category + '总分')
capability_avg_ratings['总分'] = 0 capability_avg_ratings['总分'] = 0
for temp in temp_list: for temp in temp_list:
capability_avg_ratings['总分'] += capability_avg_ratings[temp] capability_avg_ratings['总分'] += capability_avg_ratings[temp]
capability_avg_ratings['总分'] /= len(temp_list) capability_avg_ratings['总分'] /= len(temp_list)
capability_avg_ratings['总分'] = round(capability_avg_ratings['总分'], 2)
scores = {model: capability_avg_ratings} scores = {model: capability_avg_ratings}
with open(fout, 'a+', newline='') as csvfile: with open(fout, 'a+', newline='') as csvfile:
...@@ -365,8 +370,10 @@ class AlignmentBenchSummarizer: ...@@ -365,8 +370,10 @@ class AlignmentBenchSummarizer:
print(subdir_path + ' is not exist! please check!') print(subdir_path + ' is not exist! please check!')
if self.judge_type == 'general': if self.judge_type == 'general':
with open(fout, 'r') as f: with open(fout, 'r') as f:
x = from_csv(f) x = from_csv(f, delimiter=',')
print(x) print(x)
print(fout)
with open(fout2, 'r') as f: with open(fout2, 'r') as f:
x = from_csv(f) x = from_csv(f, delimiter=',')
print(x) print(x)
print(fout2)
...@@ -229,4 +229,5 @@ class CompassArenaSummarizer: ...@@ -229,4 +229,5 @@ class CompassArenaSummarizer:
for fout in fout_list: for fout in fout_list:
with open(fout, 'r') as f: with open(fout, 'r') as f:
x = from_csv(f) x = from_csv(f)
print(fout)
print(x) print(x)
...@@ -65,8 +65,9 @@ def get_capability_results( ...@@ -65,8 +65,9 @@ def get_capability_results(
capability_avg_ratings = defaultdict(float) capability_avg_ratings = defaultdict(float)
for capability, total_score in capability_ratings.items(): for capability, total_score in capability_ratings.items():
capability_avg_ratings[ s = total_score / capability_counts[capability]
capability] = total_score / capability_counts[capability] s = round(s, 2)
capability_avg_ratings[capability] = s
columns = list(capability_avg_ratings.keys()) columns = list(capability_avg_ratings.keys())
columns.insert(0, columns.pop(columns.index('total'))) columns.insert(0, columns.pop(columns.index('total')))
with open(fout, 'a+', newline='') as csvfile: with open(fout, 'a+', newline='') as csvfile:
...@@ -142,5 +143,6 @@ class MTBenchSummarizer(CompassArenaSummarizer): ...@@ -142,5 +143,6 @@ class MTBenchSummarizer(CompassArenaSummarizer):
with open(fout, 'r') as f: with open(fout, 'r') as f:
x = from_csv(f) x = from_csv(f)
print(x) print(x)
print(fout)
elif self.judge_type == 'pair': elif self.judge_type == 'pair':
super().summarize() super().summarize()
...@@ -43,9 +43,12 @@ class OpenICLInferTask(BaseTask): ...@@ -43,9 +43,12 @@ class OpenICLInferTask(BaseTask):
the command. the command.
""" """
script_path = __file__ script_path = __file__
has_vllm = ('VLLM' in str(self.model_cfgs[0].get('type', ''))) or \ backend_keys = ['VLLM', 'Lmdeploy']
'VLLM' in str(self.model_cfgs[0].get('llm', {}).get('type', '')) use_backend = any(
if self.num_gpus > 0 and not has_vllm: key in str(self.model_cfgs[0].get('type', ''))
or key in str(self.model_cfgs[0].get('llm', {}).get('type', ''))
for key in backend_keys)
if self.num_gpus > 0 and not use_backend:
port = random.randint(12000, 32000) port = random.randint(12000, 32000)
command = (f'torchrun --master_port={port} ' command = (f'torchrun --master_port={port} '
f'--nproc_per_node {self.num_procs} ' f'--nproc_per_node {self.num_procs} '
......
...@@ -120,7 +120,10 @@ class AlpacaEvalTask(BaseTask): ...@@ -120,7 +120,10 @@ class AlpacaEvalTask(BaseTask):
filename = get_infer_output_path(m_cfg, dataset_cfg, filename = get_infer_output_path(m_cfg, dataset_cfg,
osp.join(work_dir, 'predictions')) osp.join(work_dir, 'predictions'))
output_path = osp.join(work_dir, 'results', m_cfg['abbr']) output_path = osp.join(work_dir, 'results', m_cfg['abbr'])
command = f'export OPENAI_API_KEY={api_key}; alpaca_eval --model_outputs {filename} --annotators_config {alpaca_cfg} --output_path {output_path}' command = ''
if api_key is not None:
command += f'export OPENAI_API_KEY={api_key}; '
command += f'alpaca_eval --model_outputs {filename} --annotators_config {alpaca_cfg} --output_path {output_path}'
return template.format(task_cmd=command) return template.format(task_cmd=command)
def run(self): def run(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment