Unverified Commit b39f5015 authored by Fengzhe Zhou's avatar Fengzhe Zhou Committed by GitHub
Browse files

[Sync] update taco (#1030)

parent 16f29b25
......@@ -48,13 +48,13 @@ class Qwen(BaseAPIModel):
def generate(
self,
inputs: List[str or PromptList],
inputs: List[PromptType],
max_out_len: int = 512,
) -> List[str]:
"""Generate results given a list of inputs.
Args:
inputs (List[str or PromptList]): A list of strings or PromptDicts.
inputs (List[PromptType]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
......@@ -71,13 +71,13 @@ class Qwen(BaseAPIModel):
def _generate(
self,
input: str or PromptList,
input: PromptType,
max_out_len: int = 512,
) -> str:
"""Generate results given an input.
Args:
inputs (str or PromptList): A string or PromptDict.
inputs (PromptType): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
......@@ -103,16 +103,26 @@ class Qwen(BaseAPIModel):
messages = [{'role': 'user', 'content': input}]
else:
messages = []
for item in input:
msg = {'content': item['prompt']}
if item['role'] == 'HUMAN':
msg['role'] = 'user'
msg_buffer, last_role = [], None
for index, item in enumerate(input):
if index == 0 and item['role'] == 'SYSTEM':
role = 'system'
elif item['role'] == 'BOT':
msg['role'] = 'assistant'
elif item['role'] == 'SYSTEM':
msg['role'] = 'system'
messages.append(msg)
role = 'assistant'
else:
role = 'user'
if role != last_role and last_role is not None:
messages.append({
'content': '\n'.join(msg_buffer),
'role': last_role
})
msg_buffer = []
msg_buffer.append(item['prompt'])
last_role = role
messages.append({
'content': '\n'.join(msg_buffer),
'role': last_role
})
data = {'messages': messages}
data.update(self.generation_kwargs)
......@@ -142,6 +152,8 @@ class Qwen(BaseAPIModel):
if response.status_code == 200:
try:
msg = response.output.text
print('=' * 128)
print(msg)
return msg
except KeyError:
print(response)
......@@ -153,6 +165,8 @@ class Qwen(BaseAPIModel):
time.sleep(2)
continue
if response.status_code == 400:
print('=' * 128)
print(response)
msg = 'Output data may contain inappropriate content.'
return msg
......
......@@ -61,13 +61,13 @@ class SenseTime(BaseAPIModel):
def generate(
self,
inputs: List[str or PromptList],
inputs: List[PromptType],
max_out_len: int = 512,
) -> List[str]:
"""Generate results given a list of inputs.
Args:
inputs (List[str or PromptList]): A list of strings or PromptDicts.
inputs (List[PromptType]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
......@@ -84,13 +84,13 @@ class SenseTime(BaseAPIModel):
def _generate(
self,
input: str or PromptList,
input: PromptType,
max_out_len: int = 512,
) -> str:
"""Generate results given an input.
Args:
inputs (str or PromptList): A string or PromptDict.
inputs (PromptType): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
......@@ -114,7 +114,8 @@ class SenseTime(BaseAPIModel):
messages.append(msg)
data = {'messages': messages, 'model': self.model}
data.update(self.params)
if self.params is not None:
data.update(self.params)
stream = data['stream']
......@@ -123,10 +124,14 @@ class SenseTime(BaseAPIModel):
self.acquire()
max_num_retries += 1
raw_response = requests.request('POST',
url=self.url,
headers=self.headers,
json=data)
try:
raw_response = requests.request('POST',
url=self.url,
headers=self.headers,
json=data)
except Exception:
time.sleep(1)
continue
requests_id = raw_response.headers['X-Request-Id'] # noqa
self.release()
......
import copy
from concurrent.futures import ThreadPoolExecutor
from typing import Dict, List, Optional, Union
......@@ -70,11 +71,10 @@ class TurboMindModel(BaseModel):
self.gen_config = gen_config
self.end_str = end_str
def generate(
self,
inputs: List[str],
max_out_len: int = 512,
) -> List[str]:
def generate(self,
inputs: List[str],
max_out_len: int = 512,
**kwargs) -> List[str]:
"""Generate results given a list of inputs.
Args:
......@@ -93,6 +93,15 @@ class TurboMindModel(BaseModel):
inputs[i:i + batch_size] for i in range(0, len(inputs), batch_size)
]
gen_config = copy.deepcopy(self.gen_config)
if 'do_sample' in kwargs:
if kwargs['do_sample']:
gen_config.top_k = 1000
gen_config.temperature = kwargs.get('temperature', 1)
else:
gen_config.top_k = 1
gen_config.temperature = 0.01
results = []
for batch_input in batch_inputs:
with ThreadPoolExecutor() as executor:
......@@ -103,7 +112,7 @@ class TurboMindModel(BaseModel):
self.generator_ids[:len(batch_input)],
batch_input,
[max_out_len] * len(batch_input),
[self.gen_config] * len(batch_input),
[gen_config] * len(batch_input),
[self.end_str] * len(batch_input),
))
results += _results
......@@ -123,14 +132,14 @@ class TurboMindModel(BaseModel):
def _generate(self,
generator,
session_id,
prompt: str or PromptList,
prompt: PromptType,
max_out_len: int,
gen_config=None,
end_str: Optional[str] = None) -> str:
"""Generate results given a list of inputs.
Args:
prompt (str or PromptList): A string or PromptDict.
prompt (PromptType): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
......@@ -189,3 +198,22 @@ class TurboMindModel(BaseModel):
results.append(res)
results = np.concatenate(results)
return results
def get_loglikelihood(
self,
inputs: List[str],
conts: List[str],
mask_length: Optional[List[int]] = None) -> List[float]:
assert isinstance(
inputs, List), f'List(str) is expected, but got {type(inputs)}'
results = []
for text, cont in zip(inputs, conts):
input_ids = self.tokenizer.encode(text)
res = self.generators[0].get_ppl(input_ids)
logit_sum = res * len(input_ids)
input_ids = self.tokenizer.encode(text.replace(cont, ''))
res = self.generators[0].get_ppl(input_ids)
logit_part = res * len(input_ids)
results.append(-(logit_sum - logit_part))
results = np.concatenate(results)
return results
......@@ -60,14 +60,14 @@ class TurboMindAPIModel(BaseModel):
def generate(
self,
inputs: List[str or PromptList],
inputs: List[PromptType],
max_out_len: int = 512,
temperature: float = 1.0,
) -> List[str]:
"""Generate results given a list of inputs.
Args:
inputs (List[str or PromptList]): A list of strings or PromptDicts.
inputs (List[PromptType]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
......@@ -102,12 +102,12 @@ class TurboMindAPIModel(BaseModel):
"""
return self.token_bucket.get_token()
def _generate(self, prompt: str or PromptList, max_out_len: int,
def _generate(self, prompt: PromptType, max_out_len: int,
temperature: float, end_str: str) -> str:
"""Generate results given a list of inputs.
Args:
prompt (str or PromptList): A string or PromptDict.
prompt (PromptType): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
......
......@@ -58,14 +58,14 @@ class TurboMindTisModel(BaseModel):
def generate(
self,
inputs: List[str or PromptList],
inputs: List[PromptType],
max_out_len: int = 512,
temperature: float = 1.0,
) -> List[str]:
"""Generate results given a list of inputs.
Args:
inputs (List[str or PromptList]): A list of strings or PromptDicts.
inputs (List[PromptType]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
......@@ -96,12 +96,12 @@ class TurboMindTisModel(BaseModel):
"""
return self.token_bucket.get_token()
def _generate(self, prompt: str or PromptList, max_out_len: int,
def _generate(self, prompt: PromptType, max_out_len: int,
temperature: float) -> str:
"""Generate results given a list of inputs.
Args:
prompt (str or PromptList): A string or PromptDict.
prompt (PromptType): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
......
import hashlib
import time
import uuid
from concurrent.futures import ThreadPoolExecutor
from typing import Dict, List, Optional, Union
import requests
from opencompass.utils.prompt import PromptList
from .base_api import BaseAPIModel
PromptType = Union[PromptList, str]
def get_sign(appkey, udid, timestamp, secret):
original_str = f'{appkey}{udid}{timestamp}{secret}'
sign = ''
try:
md = hashlib.sha256()
md.update(original_str.encode('utf-8'))
bytes_result = md.digest()
for byte in bytes_result:
hex_value = format(byte, '02X')
sign += hex_value.upper()
except Exception as e:
print(e)
return sign
class UniGPT(BaseAPIModel):
def __init__(
self,
path: str,
appkey: str,
secret: str,
url: str,
query_per_second: int = 2,
max_seq_len: int = 2048,
meta_template: Optional[Dict] = None,
retry: int = 2,
temperature: float = 0.2,
): # noqa E125
super().__init__(
path=path,
max_seq_len=max_seq_len,
query_per_second=query_per_second,
meta_template=meta_template,
retry=retry,
)
self.appkey = appkey
self.secret = secret
self.udid = str(uuid.uuid1())
self.url = url
self.model = path
self.temperature = temperature
def generate(self,
inputs: List[PromptType],
max_out_len: int = 512) -> List[str]:
"""Generate results given a list of inputs.
Args:
inputs (List[PromptType]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
Returns:
List[str]: A list of generated strings.
"""
with ThreadPoolExecutor() as executor:
results = list(
executor.map(self._generate, inputs,
[max_out_len] * len(inputs)))
self.flush()
return results
def _generate(self, input: PromptType, max_out_len: int = 512) -> str:
"""Generate results given an input.
Args:
inputs (PromptType): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
Returns:
str: The generated string.
"""
assert isinstance(input, (str, PromptList))
if isinstance(input, str):
messages = [{'role': 'user', 'content': input}]
else:
messages = []
for item in input:
msg = {'content': item['prompt']}
if item['role'] == 'HUMAN':
msg['role'] = 'user'
elif item['role'] == 'BOT':
msg['role'] = 'assistant'
elif item['role'] == 'SYSTEM':
msg['role'] = 'system'
messages.append(msg)
data = {
'model': self.path,
'temperature': self.temperature,
'messages': messages,
'max_tokens': max_out_len,
}
timestamp = str(int(time.time()) * 1000)
headers = {
'appkey': self.appkey,
'sign': get_sign(self.appkey, self.udid, timestamp, self.secret),
'stream': 'false',
'timestamp': timestamp,
'udid': self.udid,
'censor': 'none',
}
for _ in range(self.retry):
try:
response = requests.post(self.url, json=data, headers=headers)
except Exception as e:
print(e)
continue
if response is None or response.status_code != 200:
code = response.status_code if response else -1
print(f'request err, status_code: {code}')
time.sleep(10)
continue
try:
response = response.json()
except Exception as e:
print(e)
continue
print(response)
if response.get('errorCode') == '8500502':
return 'context_length_exceeded'
return response['result']['choices'][0]['message']['content']
raise RuntimeError(f'Failed to respond in {self.retry} retrys')
......@@ -98,13 +98,13 @@ class XunFei(BaseAPIModel):
def generate(
self,
inputs: List[str or PromptList],
inputs: List[PromptType],
max_out_len: int = 512,
) -> List[str]:
"""Generate results given a list of inputs.
Args:
inputs (List[str or PromptList]): A list of strings or PromptDicts.
inputs (List[PromptType]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
......@@ -121,13 +121,13 @@ class XunFei(BaseAPIModel):
def _generate(
self,
input: str or PromptList,
input: PromptType,
max_out_len: int = 512,
) -> List[str]:
"""Generate results given an input.
Args:
inputs (str or PromptList): A string or PromptDict.
inputs (PromptType): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
......
import base64
import hashlib
import hmac
import random
import string
import time
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from typing import Dict, List, Optional, Union
import requests
from opencompass.utils.prompt import PromptList
from .base_api import BaseAPIModel
PromptType = Union[PromptList, str]
def generate_random_string(length=16):
"""生成随机串.
:param length: 随机串长度,默认为 16
:return: 随机串
"""
letters = string.ascii_letters + string.digits
rand_str = ''.join(random.choice(letters) for i in range(length))
return rand_str
def get_current_time(format='%Y-%m-%d %H:%M:%S'):
"""获取当前时间.
:param format: 时间格式,默认为 '%H:%M:%S'
:return: 当前时间字符串
"""
now = datetime.now()
time_str = now.strftime(format)
return time_str
def get_current_timestamp():
"""
获取当前时间时间戳
:return:
"""
timestamp_str = int(round(time.time() * 1000))
return str(timestamp_str)
def encode_base64_string(s):
"""对字符串进行 Base64 编码.
:param s: 字符串
:return: 编码后的字符串
"""
encoded = base64.b64encode(s).decode()
return encoded
def get_current_time_gmt_format():
"""
获取当前时间的GMT 时间
:return:
"""
GMT_FORMAT = '%a, %d %b %Y %H:%M:%SGMT+00:00'
now = datetime.now()
time_str = now.strftime(GMT_FORMAT)
return time_str
class Yayi(BaseAPIModel):
"""Model wrapper around SenseTime.
Args:
path (str): The name of SenseTime model.
e.g. `nova-ptc-xl-v1`
key (str): Authorization key.
query_per_second (int): The maximum queries allowed per second
between two consecutive calls of the API. Defaults to 1.
max_seq_len (int): Unused here.
meta_template (Dict, optional): The model's meta prompt
template if needed, in case the requirement of injecting or
wrapping of any meta instructions.
retry (int): Number of retires if the API call fails. Defaults to 2.
"""
def __init__(
self,
path: str,
url: str,
url_path: str,
x_tilake_app_key: str,
x_tilake_app_secret: str,
x_tilake_ca_sginature_method: str,
query_per_second: int = 2,
max_seq_len: int = 2048,
meta_template: Optional[Dict] = None,
retry: int = 2,
temperature: float = 0.4,
):
super().__init__(
path=path,
max_seq_len=max_seq_len,
query_per_second=query_per_second,
meta_template=meta_template,
retry=retry,
)
self.url = url
self.url_path = url_path
self.X_TILAKE_APP_KEY = x_tilake_app_key
self.X_TILAKE_APP_SECRET = x_tilake_app_secret
self.X_TILAKE_CA_SGINATURE_METHOD = x_tilake_ca_sginature_method
self.temperature = temperature
self.model = path
def generate_signature(self, method, accept, content_type, date, url_path):
"""生成签名.
:param method:
:param accept:
:param content_type:
:param date:
:param url_path:
:return:
"""
string_to_sign = (method + '\n' + accept + '\n' + content_type + '\n' +
date + '\n' + url_path)
string_to_sign = string_to_sign.encode('utf-8')
secret_key = self.X_TILAKE_APP_SECRET.encode('utf-8')
signature = hmac.new(secret_key, string_to_sign,
hashlib.sha256).digest()
return encode_base64_string(signature)
def generate_header(self, content_type, accept, date, signature):
"""生成请求头参数.
:param content_type:
:param accept:
:return:
"""
headers = {
'x-tilake-app-key': self.X_TILAKE_APP_KEY,
'x-tilake-ca-signature-method': self.X_TILAKE_CA_SGINATURE_METHOD,
'x-tilake-ca-timestamp': get_current_timestamp(),
'x-tilake-ca-nonce': generate_random_string(),
'x-tilake-ca-signature': signature,
'Date': date,
'Content-Type': content_type,
'Accept': accept,
}
return headers
def generate(
self,
inputs: List[PromptType],
max_out_len: int = 512,
) -> List[str]:
"""Generate results given a list of inputs.
Args:
inputs (List[PromptType]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
Returns:
List[str]: A list of generated strings.
"""
with ThreadPoolExecutor() as executor:
results = list(
executor.map(self._generate, inputs,
[max_out_len] * len(inputs)))
self.flush()
return results
def _generate(
self,
input: PromptType,
max_out_len: int = 512,
) -> str:
"""Generate results given an input.
Args:
inputs (PromptType): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
Returns:
str: The generated string.
"""
assert isinstance(input, (str, PromptList))
if isinstance(input, str):
messages = [{'role': 'user', 'content': input}]
else:
messages = []
msg_buffer, last_role = [], None
for item in input:
item['role'] = 'yayi' if item['role'] == 'BOT' else 'user'
if item['role'] != last_role and last_role is not None:
messages.append({
'content': '\n'.join(msg_buffer),
'role': last_role
})
msg_buffer = []
msg_buffer.append(item['prompt'])
last_role = item['role']
messages.append({
'content': '\n'.join(msg_buffer),
'role': last_role
})
date = get_current_time_gmt_format()
content_type = 'application/json'
accept = '*/*'
method = 'POST'
data = {
'id': '001', # 请求id,无需修改。
'model': self.model,
'messages': messages,
'max_new_tokens': max_out_len, # max_new_tokens及以下参数可根据实际任务进行调整。
'temperature': self.temperature,
'presence_penalty': 0.85,
'frequency_penalty': 0.16,
'do_sample': True,
'top_p': 1.0,
'top_k': -1,
}
for _ in range(self.retry):
signature_str = self.generate_signature(method=method,
accept=accept,
content_type=content_type,
date=date,
url_path=self.url_path)
headers = self.generate_header(content_type=content_type,
accept=accept,
date=date,
signature=signature_str)
try:
response = requests.post(self.url, json=data, headers=headers)
except Exception as e:
print(e)
continue
try:
response = response.json()
except Exception as e:
print(e)
continue
print(response)
try:
return response['data']['choices'][0]['message']['content']
except Exception as e:
print(e)
continue
raise RuntimeError(f'Failed to respond in {self.retry} retrys')
......@@ -44,13 +44,13 @@ class ZhiPuAI(BaseAPIModel):
def generate(
self,
inputs: List[str or PromptList],
inputs: List[PromptType],
max_out_len: int = 512,
) -> List[str]:
"""Generate results given a list of inputs.
Args:
inputs (List[str or PromptList]): A list of strings or PromptDicts.
inputs (List[PromptType]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
......@@ -67,13 +67,13 @@ class ZhiPuAI(BaseAPIModel):
def _generate(
self,
input: str or PromptList,
input: PromptType,
max_out_len: int = 512,
) -> str:
"""Generate results given an input.
Args:
inputs (str or PromptList): A string or PromptDict.
inputs (PromptType): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
......
......@@ -2,8 +2,6 @@ import time
from concurrent.futures import ThreadPoolExecutor
from typing import Dict, List, Optional, Union
from httpx import ProxyError
from opencompass.utils.prompt import PromptList
from .base_api import BaseAPIModel
......@@ -59,13 +57,13 @@ class ZhiPuV2AI(BaseAPIModel):
def generate(
self,
inputs: List[str or PromptList],
inputs: List[PromptType],
max_out_len: int = 512,
) -> List[str]:
"""Generate results given a list of inputs.
Args:
inputs (List[str or PromptList]): A list of strings or PromptDicts.
inputs (List[PromptType]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
......@@ -82,13 +80,13 @@ class ZhiPuV2AI(BaseAPIModel):
def _generate(
self,
input: str or PromptList,
input: PromptType,
max_out_len: int = 512,
) -> str:
"""Generate results given an input.
Args:
inputs (str or PromptList): A string or PromptDict.
inputs (PromptType): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
......@@ -103,6 +101,8 @@ class ZhiPuV2AI(BaseAPIModel):
else:
messages = []
for item in input:
if not item['prompt']:
continue
msg = {'content': item['prompt']}
if item['role'] == 'HUMAN':
msg['role'] = 'user'
......@@ -115,11 +115,15 @@ class ZhiPuV2AI(BaseAPIModel):
data = {'model': self.model, 'messages': messages}
data.update(self.generation_kwargs)
from pprint import pprint
print('-' * 128)
pprint(data)
max_num_retries = 0
while max_num_retries < self.retry:
self.acquire()
response = None
from httpx import ProxyError
try:
response = self.client.chat.completions.create(**data)
......@@ -161,6 +165,8 @@ class ZhiPuV2AI(BaseAPIModel):
# msg = response['data']['choices'][0]['content']
else:
msg = response.choices[0].message.content
print('=' * 128)
print(msg)
return msg
# sensitive content, prompt overlength, network error
# or illegal prompt
......
......@@ -120,7 +120,7 @@ class LMEvaluator:
meta: Optional[bool] = False,
infer_order: Optional[str] = 'random') -> Dict:
dup_indices = []
if type(predictions) == list:
if isinstance(predictions, list):
"""Apply to multi-model comparison."""
references = [{} for _ in range(len(predictions[0]['model_preds']))
] if references is None else references
......@@ -137,7 +137,7 @@ class LMEvaluator:
if len(set(check)) == 1:
dup_indices.append(i)
elif type(predictions) == dict:
elif isinstance(predictions, dict):
"""Apply to single-model scoring."""
references = [{} for _ in range(len(predictions[0]['model_preds']))
] if references is None else references
......
......@@ -77,7 +77,7 @@ class PromptTemplate:
label (:obj:`Hashable`): The value of the output field.
Returns:
str or PromptList: The generated in-context example.
PromptType: The generated in-context example.
"""
# Select the corresponding template
if isinstance(self.template, str) or self.prompt_type == 'meta':
......@@ -114,7 +114,7 @@ class PromptTemplate:
entry (:obj:`Dict`): A piece of data containing the input field
content.
ice (str or PromptList): The generated in-context example.
ice (PromptType): The generated in-context example.
label (:obj:`Hashable`): The value of the output field.
remain_sep (:obj:`bool`): If remain sep_token
......@@ -165,7 +165,7 @@ class PromptTemplate:
the :obj:`ice_token`. Defaults to ``''``.
Returns:
str or PromptList: The generated item.
PromptType: The generated item.
"""
template = None
if isinstance(self.template, str):
......@@ -220,7 +220,7 @@ class PromptTemplate:
examples.
Returns:
str or PromptList: The encoded template.
PromptType: The encoded template.
"""
if isinstance(prompt_template, str):
return prompt_template
......
from typing import Callable, List, Optional, Type, Union
from mmengine.registry import DATASETS as MMENGINE_DATASETS
from mmengine.registry import METRICS as MMENGINE_METRICS
from mmengine.registry import MODELS as MMENGINE_MODELS
from mmengine.registry import Registry
from mmengine.registry import Registry as OriginalRegistry
class Registry(OriginalRegistry):
# override the default force behavior
def register_module(
self,
name: Optional[Union[str, List[str]]] = None,
force: bool = True,
module: Optional[Type] = None) -> Union[type, Callable]:
return super().register_module(name, force, module)
PARTITIONERS = Registry('partitioner', locations=['opencompass.partitioners'])
RUNNERS = Registry('runner', locations=['opencompass.runners'])
......
......@@ -118,6 +118,7 @@ class DLCRunner(BaseRunner):
conda_env_name = self.aliyun_cfg['conda_env_name']
shell_cmd = (f'source {bashrc_path}; '
f'conda activate {conda_env_name}; ')
shell_cmd += f'export PYTHONPATH={pwd}:$PYTHONPATH; '
else:
# using public conda env
# users can also set `python_env_path` to their
......@@ -151,6 +152,11 @@ class DLCRunner(BaseRunner):
if hf_endpoint is not None:
shell_cmd += f'export HF_ENDPOINT={hf_endpoint}; '
extra_envs = self.aliyun_cfg.get('extra_envs')
if extra_envs is not None:
for extra_env in extra_envs:
shell_cmd += f'export {extra_env}; '
shell_cmd += f'cd {pwd}; '
shell_cmd += '{task_cmd}'
......@@ -161,9 +167,9 @@ class DLCRunner(BaseRunner):
f" -c {self.aliyun_cfg['dlc_config_path']}"
f" --workspace_id {self.aliyun_cfg['workspace_id']}"
' --worker_count 1'
f' --worker_cpu {max(num_gpus * 8, 32)}'
f' --worker_cpu {max(num_gpus * 8, 12)}'
f' --worker_gpu {num_gpus}'
f' --worker_memory {max(num_gpus * 128, 256)}'
f' --worker_memory {max(num_gpus * 128, 192)}'
f" --worker_image {self.aliyun_cfg['worker_image']}")
get_cmd = partial(task.get_command,
cfg_path=param_file,
......@@ -185,14 +191,25 @@ class DLCRunner(BaseRunner):
time.sleep(random.randint(0, 10))
def _run_within_retry():
output = subprocess.getoutput(cmd)
match = re.search(r'\|\s+(dlc[0-9a-z]+)\s+\|', output)
if match is None:
raise RuntimeError(
f'Failed to launch dlc job for {output}')
num_retry_to_start = 5
index_to_start = 0
while index_to_start < num_retry_to_start:
index_to_start += 1
output = subprocess.getoutput(cmd)
match = re.search(r'\|\s+(dlc[0-9a-z]+)\s+\|', output)
if match is None:
stdout.write('Failed to get job id from output:')
stdout.write(output)
if index_to_start < num_retry_to_start:
stdout.write(f'Retry #{index_to_start} starting')
time.sleep(2)
continue
else:
job_id = match.group(1)
stdout.write(output)
break
else:
job_id = match.group(1)
stdout.write(output)
raise RuntimeError(f'Cannot get job id from {output}')
pod_create_time = None
pri_time = None
......@@ -200,7 +217,7 @@ class DLCRunner(BaseRunner):
while True:
# 1. Avoid to request dlc too frequently.
# 2. DLC job may not be ready immediately after creation.
for _ in range(5):
for _ in range(20):
time.sleep(2)
try:
job_info = json.loads(
......
......@@ -17,7 +17,7 @@ from opencompass.utils import (LarkReporter, dataset_abbr_from_cfg,
from opencompass.utils.prompt import get_prompt_hash
METRIC_WHITELIST = ['score', 'auc_score', 'accuracy', 'humaneval_pass@1', 'rouge1', 'avg_toxicity_score', 'bleurt_diff', 'matthews_correlation', 'truth', 'f1', 'exact_match']
METRIC_BLACKLIST = ['bp', 'sys_len', 'ref_len']
METRIC_BLACKLIST = ['bp', 'sys_len', 'ref_len', 'tool_rate']
def model_abbr_from_cfg_used_in_summarizer(model):
if model.get('summarizer_abbr', None):
......
......@@ -218,8 +218,9 @@ def get_dimension_results(judged_answers, references, fout, fout_flag, model):
dimension_avg_ratings = defaultdict(float)
for dimension, total_score in dimension_ratings.items():
dimension_avg_ratings[
dimension] = total_score / dimension_counts[dimension]
s = total_score / dimension_counts[dimension]
s = round(s, 2)
dimension_avg_ratings[dimension] = s
scores = {model: dimension_avg_ratings}
rows = list(scores.keys())
......@@ -249,8 +250,9 @@ def get_capability_results(judged_answers,
capability_avg_ratings = defaultdict(float)
for capability, total_score in capability_ratings.items():
capability_avg_ratings[
capability] = total_score / capability_counts[capability]
s = total_score / capability_counts[capability]
s = round(s, 2)
capability_avg_ratings[capability] = s
temp_list = []
total_column_num = 2
......@@ -260,11 +262,14 @@ def get_capability_results(judged_answers,
np.mean(capability_avg_ratings[cat])
for cat in categories[category]
])
capability_avg_ratings[category + '总分'] = round(
capability_avg_ratings[category + '总分'], 2)
temp_list.append(category + '总分')
capability_avg_ratings['总分'] = 0
for temp in temp_list:
capability_avg_ratings['总分'] += capability_avg_ratings[temp]
capability_avg_ratings['总分'] /= len(temp_list)
capability_avg_ratings['总分'] = round(capability_avg_ratings['总分'], 2)
scores = {model: capability_avg_ratings}
with open(fout, 'a+', newline='') as csvfile:
......@@ -365,8 +370,10 @@ class AlignmentBenchSummarizer:
print(subdir_path + ' is not exist! please check!')
if self.judge_type == 'general':
with open(fout, 'r') as f:
x = from_csv(f)
x = from_csv(f, delimiter=',')
print(x)
print(fout)
with open(fout2, 'r') as f:
x = from_csv(f)
x = from_csv(f, delimiter=',')
print(x)
print(fout2)
......@@ -229,4 +229,5 @@ class CompassArenaSummarizer:
for fout in fout_list:
with open(fout, 'r') as f:
x = from_csv(f)
print(fout)
print(x)
......@@ -65,8 +65,9 @@ def get_capability_results(
capability_avg_ratings = defaultdict(float)
for capability, total_score in capability_ratings.items():
capability_avg_ratings[
capability] = total_score / capability_counts[capability]
s = total_score / capability_counts[capability]
s = round(s, 2)
capability_avg_ratings[capability] = s
columns = list(capability_avg_ratings.keys())
columns.insert(0, columns.pop(columns.index('total')))
with open(fout, 'a+', newline='') as csvfile:
......@@ -142,5 +143,6 @@ class MTBenchSummarizer(CompassArenaSummarizer):
with open(fout, 'r') as f:
x = from_csv(f)
print(x)
print(fout)
elif self.judge_type == 'pair':
super().summarize()
......@@ -43,9 +43,12 @@ class OpenICLInferTask(BaseTask):
the command.
"""
script_path = __file__
has_vllm = ('VLLM' in str(self.model_cfgs[0].get('type', ''))) or \
'VLLM' in str(self.model_cfgs[0].get('llm', {}).get('type', ''))
if self.num_gpus > 0 and not has_vllm:
backend_keys = ['VLLM', 'Lmdeploy']
use_backend = any(
key in str(self.model_cfgs[0].get('type', ''))
or key in str(self.model_cfgs[0].get('llm', {}).get('type', ''))
for key in backend_keys)
if self.num_gpus > 0 and not use_backend:
port = random.randint(12000, 32000)
command = (f'torchrun --master_port={port} '
f'--nproc_per_node {self.num_procs} '
......
......@@ -120,7 +120,10 @@ class AlpacaEvalTask(BaseTask):
filename = get_infer_output_path(m_cfg, dataset_cfg,
osp.join(work_dir, 'predictions'))
output_path = osp.join(work_dir, 'results', m_cfg['abbr'])
command = f'export OPENAI_API_KEY={api_key}; alpaca_eval --model_outputs {filename} --annotators_config {alpaca_cfg} --output_path {output_path}'
command = ''
if api_key is not None:
command += f'export OPENAI_API_KEY={api_key}; '
command += f'alpaca_eval --model_outputs {filename} --annotators_config {alpaca_cfg} --output_path {output_path}'
return template.format(task_cmd=command)
def run(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment