"git@developer.sourcefind.cn:cnjsdfcy/simbricks.git" did not exist on "565da1fb1e2638c25644862966b5541f7b54f06f"
Unverified Commit 62dbf047 authored by Fengzhe Zhou's avatar Fengzhe Zhou Committed by GitHub
Browse files

[Sync] update github workflow (#1156)

parent aa2dd2b5
from opencompass.models import TurboMindModel
settings = [
('qwen-1.8b-turbomind', 'Qwen/Qwen-1_8B', 1),
('qwen-7b-turbomind', 'Qwen/Qwen-7B', 1),
('qwen-14b-turbomind', 'Qwen/Qwen-14B', 1),
('qwen-72b-turbomind', 'Qwen/Qwen-72B', 4),
]
models = []
for abbr, path, num_gpus in settings:
models.append(
dict(
type=TurboMindModel,
abbr=abbr,
path=path,
engine_config=dict(session_len=2048, max_batch_size=16, tp=num_gpus),
gen_config=dict(top_k=1, temperature=1, top_p=0.9, max_new_tokens=1024),
max_out_len=1024,
max_seq_len=2048,
batch_size=16,
concurrency=16,
run_cfg=dict(num_gpus=num_gpus),
)
)
from opencompass.models import LmdeployPytorchModel
settings = [
('yi-6b-pytorch', '01-ai/Yi-6B', 1),
('yi-34b-pytorch', '01-ai/Yi-34B', 2),
]
models = []
for abbr, path, num_gpus in settings:
models.append(
dict(
type=LmdeployPytorchModel,
abbr=abbr,
path=path,
engine_config=dict(session_len=2048, max_batch_size=16, tp=num_gpus),
gen_config=dict(top_k=1, temperature=1, top_p=0.9, max_new_tokens=1024),
max_out_len=1024,
max_seq_len=2048,
batch_size=16,
concurrency=16,
run_cfg=dict(num_gpus=num_gpus),
)
)
......@@ -2,7 +2,7 @@
from mmengine.config import read_base
with read_base():
from .groups.cibench import cibench_summary_groups
from .groups.legacy.cibench import cibench_summary_groups
from .groups.plugineval import plugineval_summary_groups
......
_cibench = ['Pandas', 'Matplotlib', 'Opencv', 'SciPy', 'Seaborn', 'PyTorch']
_cibench = ['cibench_' + i for i in _cibench]
cibench_summary_groups = [{'name': 'cibench', 'subsets': _cibench}]
_cibench_template = ['lightgbm', 'matplotlib', 'nltk', 'opencv', 'pandas', 'pytorch',
'scipy', 'seaborn', 'sklearn', 'tensorflow']
_cibench_template = ['cibench_template/' + i for i in _cibench_template]
# number of total exec questions in this module
_cibench_template_weight = {
'lightgbm': [30, 15, 0, 0],
'matplotlib': [42, 0, 0, 36],
'nltk': [70, 30, 20, 10],
'opencv': [60, 10, 0, 40],
'pandas': [60, 40, 0, 10],
'pytorch': [28, 0, 0, 0],
'scipy': [60, 40, 0, 0],
'seaborn': [42, 0, 0, 35],
'sklearn': [42, 6, 0, 18],
'tensorflow': [36, 6, 0, 12],
}
cibench_summary_groups.extend([
{
'name': 'cibench_template:executable',
'subsets': [[i, 'executable'] for i in _cibench_template],
'weights': {'cibench_template/' + k : v[0] for k,v in _cibench_template_weight.items()},
},
{
'name': 'cibench_template:numeric_correct',
'subsets': [[i, 'numeric_correct'] for i in _cibench_template],
'weights': {'cibench_template/' + k : v[1] for k,v in _cibench_template_weight.items()},
},
{
'name': 'cibench_template:text_score',
'subsets': [[i, 'text_score'] for i in _cibench_template],
'weights': {'cibench_template/' + k : v[2] for k,v in _cibench_template_weight.items()},
},
{
'name': 'cibench_template:vis_sim',
'subsets': [[i, 'vis_sim'] for i in _cibench_template],
'weights': {'cibench_template/' + k : v[3] for k,v in _cibench_template_weight.items()},
},
])
## chinese
_cibench_template_cn = ['lightgbm', 'matplotlib', 'nltk', 'opencv', 'pandas', 'pytorch',
'scipy', 'seaborn', 'sklearn', 'tensorflow']
_cibench_template_cn = ['cibench_template_chinese/' + i for i in _cibench_template_cn]
cibench_summary_groups.extend([
{
'name': 'cibench_template_cn:executable',
'subsets': [[i, 'executable'] for i in _cibench_template_cn],
'weights': {'cibench_template_chinese/' + k : v[0] for k,v in _cibench_template_weight.items()},
},
{
'name': 'cibench_template_cn:numeric_correct',
'subsets': [[i, 'numeric_correct'] for i in _cibench_template_cn],
'weights': {'cibench_template_chinese/' + k : v[1] for k,v in _cibench_template_weight.items()},
},
{
'name': 'cibench_template_cn:text_score',
'subsets': [[i, 'text_score'] for i in _cibench_template_cn],
'weights': {'cibench_template_chinese/' + k : v[2] for k,v in _cibench_template_weight.items()},
},
{
'name': 'cibench_template_cn:vis_sim',
'subsets': [[i, 'vis_sim'] for i in _cibench_template_cn],
'weights': {'cibench_template_chinese/' + k : v[3] for k,v in _cibench_template_weight.items()},
},
])
## add more without nltk
cibench_summary_groups.extend([
{
'name': 'cibench_template_wo_nltk:executable',
'subsets': [[i, 'executable'] for i in _cibench_template if 'nltk' not in i],
'weights': {'cibench_template/' + k : v[0] for k,v in _cibench_template_weight.items() if 'nltk' not in k},
},
{
'name': 'cibench_template_wo_nltk:numeric_correct',
'subsets': [[i, 'numeric_correct'] for i in _cibench_template if 'nltk' not in i],
'weights': {'cibench_template/' + k : v[1] for k,v in _cibench_template_weight.items() if 'nltk' not in k},
},
{
'name': 'cibench_template_wo_nltk:vis_sim',
'subsets': [[i, 'vis_sim'] for i in _cibench_template if 'nltk' not in i],
'weights': {'cibench_template/' + k : v[3] for k,v in _cibench_template_weight.items() if 'nltk' not in k},
},
])
cibench_summary_groups.extend([
{
'name': 'cibench_template_cn_wo_nltk:executable',
'subsets': [[i, 'executable'] for i in _cibench_template_cn if 'nltk' not in i],
'weights': {'cibench_template_chinese/' + k : v[0] for k,v in _cibench_template_weight.items() if 'nltk' not in k},
},
{
'name': 'cibench_template_cn_wo_nltk:numeric_correct',
'subsets': [[i, 'numeric_correct'] for i in _cibench_template_cn if 'nltk' not in i],
'weights': {'cibench_template_chinese/' + k : v[1] for k,v in _cibench_template_weight.items() if 'nltk' not in k},
},
{
'name': 'cibench_template_cn_wo_nltk:vis_sim',
'subsets': [[i, 'vis_sim'] for i in _cibench_template_cn if 'nltk' not in i],
'weights': {'cibench_template_chinese/' + k : v[3] for k,v in _cibench_template_weight.items() if 'nltk' not in k},
},
])
......@@ -170,6 +170,8 @@ def parse_dlc_args(dlc_parser):
type=str)
def parse_hf_args(hf_parser):
"""These args are all for the quick construction of HuggingFace models."""
hf_parser.add_argument('--hf-type', type=str, choices=['base', 'chat'], default='chat', help='The type of the HuggingFace model, base or chat')
......@@ -212,7 +214,7 @@ def main():
if args.work_dir is not None:
cfg['work_dir'] = args.work_dir
else:
cfg.setdefault('work_dir', osp.join('outputs', 'default'))
cfg.setdefault('work_dir', os.path.join('outputs', 'default'))
# cfg_time_str defaults to the current time
cfg_time_str = dir_time_str = datetime.now().strftime('%Y%m%d_%H%M%S')
......@@ -340,5 +342,6 @@ def main():
summarizer.summarize(time_str=cfg_time_str)
if __name__ == '__main__':
main()
......@@ -7,7 +7,8 @@ from .base import BaseModel, LMTemplateParser # noqa: F401
from .base_api import APITemplateParser, BaseAPIModel # noqa: F401
from .bytedance_api import ByteDance # noqa: F401
from .claude_api import Claude # noqa: F401
from .gemini_api import Gemini, GeminiAllesAPIN # noqa: F401
from .deepseek_api import DeepseekAPI # noqa: F401
from .gemini_api import Gemini # noqa: F401
from .glm import GLM130B # noqa: F401
from .huggingface import HuggingFace # noqa: F401
from .huggingface import HuggingFaceCausalLM # noqa: F401
......@@ -21,7 +22,7 @@ from .lightllm_api import LightllmAPI # noqa: F401
from .llama2 import Llama2, Llama2Chat # noqa: F401
from .lmdeploy_pytorch import LmdeployPytorchModel # noqa: F401
from .lmdeploy_tis import LmdeployTisModel # noqa: F401
from .minimax_api import MiniMax # noqa: F401
from .minimax_api import MiniMax, MiniMaxChatCompletionV2 # noqa: F401
from .mistral_api import Mistral # noqa: F401
from .mixtral import Mixtral # noqa: F401
from .modelscope import ModelScope, ModelScopeCausalLM # noqa: F401
......@@ -31,11 +32,12 @@ from .openai_api import OpenAI # noqa: F401
from .pangu_api import PanGu # noqa: F401
from .qwen_api import Qwen # noqa: F401
from .sensetime_api import SenseTime # noqa: F401
from .stepfun_api import StepFun # noqa: F401
from .turbomind import TurboMindModel # noqa: F401
from .turbomind_tis import TurboMindTisModel # noqa: F401
from .unigpt_api import UniGPT # noqa: F401
from .vllm import VLLM # noqa: F401
from .xunfei_api import XunFei # noqa: F401
from .xunfei_api import XunFei, XunFeiSpark # noqa: F401
from .yayi_api import Yayi # noqa: F401
from .zhipuai_api import ZhiPuAI # noqa: F401
from .zhipuai_v2_api import ZhiPuV2AI # noqa: F401
import time
from concurrent.futures import ThreadPoolExecutor
from typing import Dict, List, Optional, Union
......@@ -141,29 +140,32 @@ class AI360GPT(BaseAPIModel):
self.wait()
continue
if raw_response.status_code == 200:
try:
msg = response['choices'][0]['message']['content'].strip()
return msg
except KeyError:
if 'error' in response:
# tpm(token per minitue) limit
if response['erro']['code'] == '1005':
time.sleep(1)
continue
self.logger.error('Find error message in response: ',
str(response['error']))
msg = response['choices'][0]['message']['content'].strip()
self.logger.debug(f'Generated: {msg}')
return msg
# sensitive content, prompt overlength, network error
# or illegal prompt
if (raw_response.status_code == 400
or raw_response.status_code == 401
or raw_response.status_code == 402
or raw_response.status_code == 429
or raw_response.status_code == 500):
print(raw_response.text)
continue
if raw_response.status_code in [400, 401, 402, 429, 500]:
if 'error' not in response:
print(raw_response.status_code)
print(raw_response.text)
continue
print(response)
# tpm(token per minitue) limit
if response['error']['code'] == '1005':
self.logger.debug('tpm limit, ignoring')
continue
elif response['error']['code'] == '1001':
msg = '参数错误:messages参数过长或max_tokens参数值过大'
self.logger.debug(f'Generated: {msg}')
return msg
else:
print(response)
self.logger.error('Find error message in response: ',
str(response['error']))
print(raw_response)
max_num_retries += 1
......
......@@ -145,8 +145,8 @@ class BaiChuan(BaseAPIModel):
self.wait()
continue
if raw_response.status_code == 200:
msg = response['choices'][0]['message']['content']
self.logger.debug(f'Generated: {msg}')
return msg
if raw_response.status_code != 200:
......
......@@ -53,6 +53,8 @@ class ERNIEBot(BaseAPIModel):
self.headers = {'Content_Type': 'application/json'}
self.secretkey = secretkey
self.key = key
if not url.endswith('?access_token='):
url += '?access_token='
self.url = url
access_token, _ = self._generate_access_token()
self.access_token = access_token
......@@ -143,14 +145,25 @@ class ERNIEBot(BaseAPIModel):
messages = [{'role': 'user', 'content': input}]
else:
messages = []
msg_buffer, last_role = [], None
for item in input:
msg = {'content': item['prompt']}
if item['role'] == 'HUMAN':
msg['role'] = 'user'
elif item['role'] == 'BOT':
msg['role'] = 'assistant'
if item['role'] == 'BOT':
role = 'assistant'
else: # USER or SYSTEM
role = 'user'
if role != last_role and last_role is not None:
messages.append({
'content': '\n'.join(msg_buffer),
'role': last_role
})
msg_buffer = []
msg_buffer.append(item['prompt'])
last_role = role
messages.append({
'content': '\n'.join(msg_buffer),
'role': last_role
})
messages.append(msg)
data = {'messages': messages}
data.update(self.generation_kwargs)
......@@ -181,6 +194,7 @@ class ERNIEBot(BaseAPIModel):
if raw_response.status_code == 200:
try:
msg = response['result']
self.logger.debug(msg)
return msg
except KeyError:
print(response)
......@@ -188,9 +202,12 @@ class ERNIEBot(BaseAPIModel):
if response['error_code'] == 336007:
# exceed max length
return ''
time.sleep(1)
continue
elif response['error_code'] == 336103:
# prompt tokens too long
return ''
else:
time.sleep(1)
continue
if (response['error_code'] == 110 or response['error_code'] == 100
or response['error_code'] == 111
......
import time
from concurrent.futures import ThreadPoolExecutor
from typing import Dict, List, Optional, Union
import requests
from opencompass.utils.prompt import PromptList
from .base_api import BaseAPIModel
PromptType = Union[PromptList, str]
class DeepseekAPI(BaseAPIModel):
"""Model wrapper around DeepseekAPI.
Documentation:
Args:
path (str): The name of DeepseekAPI model.
e.g. `moonshot-v1-32k`
key (str): Authorization key.
query_per_second (int): The maximum queries allowed per second
between two consecutive calls of the API. Defaults to 1.
max_seq_len (int): Unused here.
meta_template (Dict, optional): The model's meta prompt
template if needed, in case the requirement of injecting or
wrapping of any meta instructions.
retry (int): Number of retires if the API call fails. Defaults to 2.
"""
def __init__(
self,
path: str,
key: str,
url: str,
query_per_second: int = 2,
max_seq_len: int = 2048,
meta_template: Optional[Dict] = None,
retry: int = 2,
system_prompt: str = '',
):
super().__init__(path=path,
max_seq_len=max_seq_len,
query_per_second=query_per_second,
meta_template=meta_template,
retry=retry)
self.headers = {
'Content-Type': 'application/json',
'Authorization': 'Bearer ' + key,
}
self.url = url
self.model = path
self.system_prompt = system_prompt
def generate(
self,
inputs: List[PromptType],
max_out_len: int = 512,
) -> List[str]:
"""Generate results given a list of inputs.
Args:
inputs (List[PromptType]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
Returns:
List[str]: A list of generated strings.
"""
with ThreadPoolExecutor() as executor:
results = list(
executor.map(self._generate, inputs,
[max_out_len] * len(inputs)))
self.flush()
return results
def _generate(
self,
input: PromptType,
max_out_len: int = 512,
) -> str:
"""Generate results given an input.
Args:
inputs (PromptType): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
Returns:
str: The generated string.
"""
assert isinstance(input, (str, PromptList))
if isinstance(input, str):
messages = [{'role': 'user', 'content': input}]
else:
messages = []
msg_buffer, last_role = [], None
for item in input:
item['role'] = 'assistant' if item['role'] == 'BOT' else 'user'
if item['role'] != last_role and last_role is not None:
messages.append({
'content': '\n'.join(msg_buffer),
'role': last_role
})
msg_buffer = []
msg_buffer.append(item['prompt'])
last_role = item['role']
messages.append({
'content': '\n'.join(msg_buffer),
'role': last_role
})
if self.system_prompt:
system = {'role': 'system', 'content': self.system_prompt}
messages.insert(0, system)
data = {'model': self.model, 'messages': messages}
max_num_retries = 0
while max_num_retries < self.retry:
self.acquire()
try:
raw_response = requests.request('POST',
url=self.url,
headers=self.headers,
json=data)
except Exception as err:
print('Request Error:{}'.format(err))
time.sleep(2)
continue
try:
response = raw_response.json()
except Exception as err:
print('Response Error:{}'.format(err))
response = None
self.release()
if response is None:
print('Connection error, reconnect.')
# if connect error, frequent requests will casuse
# continuous unstable network, therefore wait here
# to slow down the request
self.wait()
continue
if raw_response.status_code == 200:
# msg = json.load(response.text)
# response
msg = response['choices'][0]['message']['content']
self.logger.debug(f'Generated: {msg}')
return msg
if raw_response.status_code == 401:
print('请求被拒绝 api_key错误')
continue
elif raw_response.status_code == 400:
print(messages, response)
print('请求失败,状态码:', raw_response)
msg = 'The request was rejected because high risk'
return msg
elif raw_response.status_code == 429:
print(messages, response)
print('请求失败,状态码:', raw_response)
time.sleep(5)
continue
else:
print(messages, response)
print('请求失败,状态码:', raw_response)
time.sleep(1)
max_num_retries += 1
raise RuntimeError(raw_response)
......@@ -186,66 +186,3 @@ class Gemini(BaseAPIModel):
time.sleep(1)
raise RuntimeError('API call failed.')
class GeminiAllesAPIN(Gemini):
"""Model wrapper around Gemini models.
Documentation:
Args:
path (str): The name of Gemini model.
e.g. `gemini-pro`
key (str): Authorization key.
query_per_second (int): The maximum queries allowed per second
between two consecutive calls of the API. Defaults to 1.
max_seq_len (int): Unused here.
meta_template (Dict, optional): The model's meta prompt
template if needed, in case the requirement of injecting or
wrapping of any meta instructions.
retry (int): Number of retires if the API call fails. Defaults to 2.
"""
def __init__(
self,
path: str,
key: str,
url: str,
query_per_second: int = 2,
max_seq_len: int = 2048,
meta_template: Optional[Dict] = None,
retry: int = 2,
temperature: float = 1.0,
top_p: float = 0.8,
top_k: float = 10.0,
):
super().__init__(key=key,
path=path,
max_seq_len=max_seq_len,
query_per_second=query_per_second,
meta_template=meta_template,
retry=retry)
# Replace the url and headers into AllesApin
self.url = url
self.headers = {
'alles-apin-token': key,
'content-type': 'application/json',
}
def generate(
self,
inputs: List[PromptType],
max_out_len: int = 512,
) -> List[str]:
"""Generate results given a list of inputs.
Args:
inputs (List[PromptType]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
Returns:
List[str]: A list of generated strings.
"""
return super().generate(inputs, max_out_len)
......@@ -289,13 +289,13 @@ class HuggingFace(BaseModel):
tokens = self.tokenizer.batch_encode_plus(inputs,
padding=True,
truncation=True,
max_length=self.max_seq_len -
max_out_len)
max_length=self.max_seq_len)
tokens = {
k: torch.tensor(np.array(tokens[k]), device=self.model.device)
for k in tokens if k in ['input_ids', 'attention_mask']
}
origin_stopping_criteria = stopping_criteria
if stopping_criteria:
# Construct huggingface stopping criteria
if self.tokenizer.eos_token is not None:
......@@ -332,6 +332,9 @@ class HuggingFace(BaseModel):
if self.end_str:
decodeds = [token.split(self.end_str)[0] for token in decodeds]
if origin_stopping_criteria:
for t in origin_stopping_criteria:
decodeds = [token.split(t)[0] for token in decodeds]
return decodeds
def _single_generate(self,
......@@ -382,6 +385,7 @@ class HuggingFace(BaseModel):
max_length=self.max_seq_len -
max_out_len)['input_ids']
input_ids = torch.tensor(input_ids, device=self.model.device)
origin_stopping_criteria = stopping_criteria
if stopping_criteria:
# Construct huggingface stopping criteria
if self.tokenizer.eos_token is not None:
......@@ -419,6 +423,9 @@ class HuggingFace(BaseModel):
if self.end_str:
decodeds = [token.split(self.end_str)[0] for token in decodeds]
if origin_stopping_criteria:
for t in origin_stopping_criteria:
decodeds = [token.split(t)[0] for token in decodeds]
return decodeds
def get_logits(self, inputs: List[str]):
......
......@@ -180,3 +180,173 @@ class MiniMax(BaseAPIModel):
max_num_retries += 1
raise RuntimeError(response.text)
class MiniMaxChatCompletionV2(BaseAPIModel):
"""Model wrapper around MiniMax ChatCompletionV2.
Documentation:
Args:
path (str): The name of MiniMax model.
e.g. `moonshot-v1-32k`
key (str): Authorization key.
query_per_second (int): The maximum queries allowed per second
between two consecutive calls of the API. Defaults to 1.
max_seq_len (int): Unused here.
meta_template (Dict, optional): The model's meta prompt
template if needed, in case the requirement of injecting or
wrapping of any meta instructions.
retry (int): Number of retires if the API call fails. Defaults to 2.
"""
def __init__(
self,
path: str,
key: str,
url: str,
query_per_second: int = 2,
max_seq_len: int = 2048,
meta_template: Optional[Dict] = None,
retry: int = 2,
):
super().__init__(path=path,
max_seq_len=max_seq_len,
query_per_second=query_per_second,
meta_template=meta_template,
retry=retry)
self.headers = {
'Content-Type': 'application/json',
'Authorization': 'Bearer ' + key,
}
self.url = url
self.model = path
def generate(
self,
inputs: List[PromptType],
max_out_len: int = 512,
) -> List[str]:
"""Generate results given a list of inputs.
Args:
inputs (List[PromptType]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
Returns:
List[str]: A list of generated strings.
"""
with ThreadPoolExecutor() as executor:
results = list(
executor.map(self._generate, inputs,
[max_out_len] * len(inputs)))
self.flush()
return results
def _generate(
self,
input: PromptType,
max_out_len: int = 512,
) -> str:
"""Generate results given an input.
Args:
inputs (PromptType): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
Returns:
str: The generated string.
"""
assert isinstance(input, (str, PromptList))
if isinstance(input, str):
messages = [{'role': 'user', 'content': input}]
else:
messages = []
msg_buffer, last_role = [], None
for item in input:
item['role'] = 'assistant' if item['role'] == 'BOT' else 'user'
if item['role'] != last_role and last_role is not None:
messages.append({
'content': '\n'.join(msg_buffer),
'role': last_role
})
msg_buffer = []
msg_buffer.append(item['prompt'])
last_role = item['role']
messages.append({
'content': '\n'.join(msg_buffer),
'role': last_role
})
data = {
'model': self.model,
'messages': messages,
'max_tokens': max_out_len
}
max_num_retries = 0
while max_num_retries < self.retry:
self.acquire()
try:
raw_response = requests.request('POST',
url=self.url,
headers=self.headers,
json=data)
except Exception as err:
print('Request Error:{}'.format(err))
time.sleep(2)
continue
response = raw_response.json()
self.release()
if response is None:
print('Connection error, reconnect.')
# if connect error, frequent requests will casuse
# continuous unstable network, therefore wait here
# to slow down the request
self.wait()
continue
if raw_response.status_code == 200:
try:
msg = response['choices'][0]['message']['content']
self.logger.debug(f'Generated: {msg}')
return msg
except Exception:
code = response.get('base_resp', {}).get('status_code')
if code == 1002:
# rate limit
time.sleep(1)
continue
elif code == 1027:
return 'The request was rejected because high risk'
print(messages, response)
pass
elif raw_response.status_code == 401:
print('请求被拒绝 api_key错误')
continue
elif raw_response.status_code == 400:
print(messages, response)
print('请求失败,状态码:', raw_response)
msg = 'The request was rejected because high risk'
return msg
elif raw_response.status_code == 429:
print(messages, response)
print('请求失败,状态码:', raw_response)
time.sleep(5)
continue
else:
print(messages, response)
print('请求失败,状态码:', raw_response)
time.sleep(1)
max_num_retries += 1
raise RuntimeError(raw_response)
......@@ -152,8 +152,7 @@ class Qwen(BaseAPIModel):
if response.status_code == 200:
try:
msg = response.output.text
print('=' * 128)
print(msg)
self.logger.debug(msg)
return msg
except KeyError:
print(response)
......
import time
from concurrent.futures import ThreadPoolExecutor
from typing import Dict, List, Optional, Union
import requests
from opencompass.utils.prompt import PromptList
from .base_api import BaseAPIModel
PromptType = Union[PromptList, str]
class StepFun(BaseAPIModel):
"""Model wrapper around StepFun.
Documentation:
Args:
path (str): The name of StepFun model.
e.g. `moonshot-v1-32k`
key (str): Authorization key.
query_per_second (int): The maximum queries allowed per second
between two consecutive calls of the API. Defaults to 1.
max_seq_len (int): Unused here.
meta_template (Dict, optional): The model's meta prompt
template if needed, in case the requirement of injecting or
wrapping of any meta instructions.
retry (int): Number of retires if the API call fails. Defaults to 2.
"""
def __init__(
self,
path: str,
key: str,
url: str,
query_per_second: int = 2,
max_seq_len: int = 2048,
meta_template: Optional[Dict] = None,
retry: int = 2,
system_prompt: str = '',
):
super().__init__(path=path,
max_seq_len=max_seq_len,
query_per_second=query_per_second,
meta_template=meta_template,
retry=retry)
self.headers = {
'Content-Type': 'application/json',
'Authorization': 'Bearer ' + key,
}
self.url = url
self.model = path
self.system_prompt = system_prompt
def generate(
self,
inputs: List[PromptType],
max_out_len: int = 512,
) -> List[str]:
"""Generate results given a list of inputs.
Args:
inputs (List[PromptType]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
Returns:
List[str]: A list of generated strings.
"""
with ThreadPoolExecutor() as executor:
results = list(
executor.map(self._generate, inputs,
[max_out_len] * len(inputs)))
self.flush()
return results
def _generate(
self,
input: PromptType,
max_out_len: int = 512,
) -> str:
"""Generate results given an input.
Args:
inputs (PromptType): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
Returns:
str: The generated string.
"""
assert isinstance(input, (str, PromptList))
if isinstance(input, str):
messages = [{'role': 'user', 'content': input}]
else:
messages = []
msg_buffer, last_role = [], None
for item in input:
item['role'] = 'assistant' if item['role'] == 'BOT' else 'user'
if item['role'] != last_role and last_role is not None:
messages.append({
'content': '\n'.join(msg_buffer),
'role': last_role
})
msg_buffer = []
msg_buffer.append(item['prompt'])
last_role = item['role']
messages.append({
'content': '\n'.join(msg_buffer),
'role': last_role
})
if self.system_prompt:
system = {'role': 'system', 'content': self.system_prompt}
messages.insert(0, system)
data = {'model': self.model, 'messages': messages}
max_num_retries = 0
while max_num_retries < self.retry:
self.acquire()
try:
raw_response = requests.request('POST',
url=self.url,
headers=self.headers,
json=data)
except Exception as err:
print('Request Error:{}'.format(err))
time.sleep(2)
continue
try:
response = raw_response.json()
except Exception:
response = None
self.release()
if response is None:
print('Connection error, reconnect.')
# if connect error, frequent requests will casuse
# continuous unstable network, therefore wait here
# to slow down the request
self.wait()
continue
if raw_response.status_code == 200:
# msg = json.load(response.text)
# response
msg = response['choices'][0]['message']['content']
self.logger.debug(f'Generated: {msg}')
return msg
if raw_response.status_code == 400:
print(messages, response)
print('请求失败,状态码:', raw_response)
msg = 'The context length exceeded'
return msg
elif raw_response.status_code == 403:
print('请求被拒绝 api_key错误')
continue
elif raw_response.status_code == 429:
print(messages, response)
print('请求失败,状态码:', raw_response)
time.sleep(5)
continue
elif raw_response.status_code == 451:
print(messages, response)
print('请求失败,状态码:', raw_response)
msg = 'The request was rejected because high risk'
return msg
else:
print(messages, response)
print('请求失败,状态码:', raw_response)
time.sleep(1)
max_num_retries += 1
raise RuntimeError(raw_response)
......@@ -55,9 +55,6 @@ class TurboMindModel(BaseModel):
if engine_config is not None:
from lmdeploy.messages import TurbomindEngineConfig
engine_config = TurbomindEngineConfig(**engine_config)
if gen_config is not None:
from lmdeploy.messages import EngineGenerationConfig
gen_config = EngineGenerationConfig(**gen_config)
self.logger = get_logger()
tm_model = TurboMind.from_pretrained(path, engine_config=engine_config)
self.tokenizer = tm_model.tokenizer
......@@ -106,6 +103,7 @@ class TurboMindModel(BaseModel):
t = self.tokenizer.encode(t, add_bos=False)
stop_words.append(t[0])
gen_config['stop_words'] = list(set(stop_words))
gen_config.setdefault('min_new_tokens', 1)
from lmdeploy.messages import EngineGenerationConfig
gen_config = EngineGenerationConfig(**gen_config)
......@@ -123,6 +121,9 @@ class TurboMindModel(BaseModel):
[gen_config] * len(batch_input),
))
results += _results
if stopping_criteria:
for s in stopping_criteria:
results = [r.split(s)[0] for r in results]
return results
def get_token_len(self, prompt: str) -> int:
......
import json
import re
import time
from concurrent.futures import ThreadPoolExecutor
from typing import Dict, List, Optional, Union
......@@ -221,3 +223,150 @@ class XunFei(BaseAPIModel):
if err_code == 10013:
return err_data['header']['message']
raise RuntimeError(f'Code: {err_code}, data: {err_data}')
class XunFeiSpark(BaseAPIModel):
"""Model wrapper around XunFeiSpark.
Documentation:
Args:
path (str): The name of XunFeiSpark model.
e.g. `moonshot-v1-32k`
key (str): Authorization key.
query_per_second (int): The maximum queries allowed per second
between two consecutive calls of the API. Defaults to 1.
max_seq_len (int): Unused here.
meta_template (Dict, optional): The model's meta prompt
template if needed, in case the requirement of injecting or
wrapping of any meta instructions.
retry (int): Number of retires if the API call fails. Defaults to 2.
"""
def __init__(
self,
path: str,
url: str,
app_id: str,
api_key: str,
api_secret: str,
query_per_second: int = 2,
max_seq_len: int = 2048,
meta_template: Optional[Dict] = None,
retry: int = 2,
):
super().__init__(path=path,
max_seq_len=max_seq_len,
query_per_second=query_per_second,
meta_template=meta_template,
retry=retry)
try:
from sparkai.llm.llm import ChatSparkLLM # noqa: F401
except ImportError:
raise ImportError('run `pip install --upgrade spark_ai_python`')
self.spark_domain = path
self.url = url
self.app_id = app_id
self.api_key = api_key
self.api_secret = api_secret
def generate(
self,
inputs: List[PromptType],
max_out_len: int = 512,
) -> List[str]:
results = [self._generate(input, max_out_len) for input in inputs]
return results
def _generate(
self,
input: PromptType,
max_out_len: int = 512,
) -> str:
assert isinstance(input, (str, PromptList))
from sparkai.core.messages import ChatMessage
from sparkai.llm.llm import ChatSparkLLM
if isinstance(input, str):
messages = [ChatMessage(role='user', content=input)]
else:
messages = []
msg_buffer, last_role = [], None
for index, item in enumerate(input):
if index == 0 and item['role'] == 'SYSTEM':
role = 'system'
elif item['role'] == 'BOT':
role = 'assistant'
else:
role = 'user'
if role != last_role and last_role is not None:
content = '\n'.join(msg_buffer)
messages.append(
ChatMessage(role=last_role, content=content))
msg_buffer = []
msg_buffer.append(item['prompt'])
last_role = role
content = '\n'.join(msg_buffer)
messages.append(ChatMessage(role=last_role, content=content))
spark = ChatSparkLLM(
spark_api_url=self.url,
spark_app_id=self.app_id,
spark_api_key=self.api_key,
spark_api_secret=self.api_secret,
spark_llm_domain=self.spark_domain,
streaming=False,
max_tokens=max_out_len,
)
all_empty_response = True
for _ in range(self.retry + 1):
try:
outputs = spark.generate([messages]).generations[0]
if len(outputs) == 0:
self.logger.error('Empty response, retrying...')
continue
msg = outputs[0].text
self.logger.debug(f'Generated: {msg}')
return msg
except ConnectionError as e:
match = re.match(r'Error Code: (\d+), Error: (.*)',
e.args[0],
flags=re.DOTALL)
if match:
error_code = int(match.group(1))
msg = match.group(2)
if error_code == 10003: # query data exceed limit
self.logger.error(f'Error {error_code}: {msg}')
return msg
elif error_code in [10013, 10014]: # skip safety problem
self.logger.debug(f'Generated: {msg}')
return msg
elif error_code == 10020: # plugin result is empty
self.logger.error(f'Error {error_code}: {msg}')
return msg
elif error_code == 11202: # qps limit
time.sleep(1)
else:
self.logger.error(f'Error {error_code}: {msg}')
raise e
raise e
except TimeoutError:
self.logger.error('TimeoutError, sleep 60, retrying...')
time.sleep(60)
except Exception as e:
self.logger.error(str(e))
pass
all_empty_response = False
if all_empty_response:
self.logger.error('All empty response')
return 'all empty response'
raise RuntimeError('Failed to generate response')
......@@ -141,7 +141,7 @@ class DLCRunner(BaseRunner):
hf_offline = self.aliyun_cfg.get('hf_offline', True)
if hf_offline:
shell_cmd += 'export HF_DATASETS_OFFLINE=1; export TRANSFORMERS_OFFLINE=1; export HF_EVALUATE_OFFLINE=1; ' # noqa: E501
shell_cmd += 'export HF_DATASETS_OFFLINE=1; export TRANSFORMERS_OFFLINE=1; export HF_EVALUATE_OFFLINE=1; export HF_HUB_OFFLINE=1; ' # noqa: E501
http_proxy = self.aliyun_cfg.get('http_proxy')
if http_proxy is not None:
......@@ -158,6 +158,7 @@ class DLCRunner(BaseRunner):
shell_cmd += f'export {extra_env}; '
shell_cmd += f'cd {pwd}; '
shell_cmd += 'umask 0000; '
shell_cmd += '{task_cmd}'
tmpl = ('dlc create job'
......@@ -195,7 +196,10 @@ class DLCRunner(BaseRunner):
index_to_start = 0
while index_to_start < num_retry_to_start:
index_to_start += 1
output = subprocess.getoutput(cmd)
try:
output = subprocess.getoutput(cmd)
except BlockingIOError:
output = ''
match = re.search(r'\|\s+(dlc[0-9a-z]+)\s+\|', output)
if match is None:
stdout.write('Failed to get job id from output:')
......@@ -264,7 +268,10 @@ class DLCRunner(BaseRunner):
f" -c {self.aliyun_cfg['dlc_config_path']}"
f' --start_time {pri_time}'
f' --end_time {cur_time}')
log_output = subprocess.getoutput(logs_cmd)
try:
log_output = subprocess.getoutput(logs_cmd)
except BlockingIOError:
log_output = '[WARN] No logs found for the pod'
if '[WARN] No logs found for the pod' not in log_output:
pri_time = cur_time
......
......@@ -46,17 +46,19 @@ class LocalRunner(BaseRunner):
lark_bot_url (str): Lark bot url.
"""
def __init__(
self,
task: ConfigDict,
max_num_workers: int = 16,
debug: bool = False,
max_workers_per_gpu: int = 1,
lark_bot_url: str = None,
):
def __init__(self,
task: ConfigDict,
max_num_workers: int = 16,
debug: bool = False,
max_workers_per_gpu: int = 1,
lark_bot_url: str = None,
**kwargs):
super().__init__(task=task, debug=debug, lark_bot_url=lark_bot_url)
self.max_num_workers = max_num_workers
self.max_workers_per_gpu = max_workers_per_gpu
logger = get_logger()
for k, v in kwargs.items():
logger.warning(f'Ignored argument in {self.__module__}: {k}={v}')
def launch(self, tasks: List[Dict[str, Any]]) -> List[Tuple[str, int]]:
"""Launch multiple tasks.
......
......@@ -94,11 +94,11 @@ def first_option_postprocess(text: str, options: str, cushion=True) -> str:
f'答案是\s?(\S+)(?:。|$)',
f'答案应该是\s?(\S+)(?:。|$)',
f'答案为\s?(\S+)(?:。|$)',
f'[Tt]he answer is \(?([{options}])\)?',
f'[Tt]he answer is option \(?([{options}])\)?',
f'[Tt]he correct answer is \(?([{options}])\)?',
f'[Tt]he correct answer is option \(?([{options}])\)?',
f'[Tt]he answer to the question is \(?([{options}])\)?',
f'[Tt]he answer is:?\s+\(?([{options}])\)?',
f'[Tt]he answer is option:?\s+\(?([{options}])\)?',
f'[Tt]he correct answer is:?\s+\(?([{options}])\)?',
f'[Tt]he correct answer is option:?\s+\(?([{options}])\)?',
f'[Tt]he answer to the question is:?\s+\(?([{options}])\)?',
f'^选项\s?([{options}])',
f'^([{options}])\s?选?项',
f'(\s|^)[{options}][\s。,,::\.$]',
......@@ -116,7 +116,7 @@ def first_option_postprocess(text: str, options: str, cushion=True) -> str:
if cushion:
patterns.extend(cushion_patterns)
for pattern in patterns:
match = re.search(pattern, text)
match = re.search(pattern, text, re.DOTALL)
if match:
outputs = match.group(0)
for i in options:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment