Unverified Commit 62dbf047 authored by Fengzhe Zhou's avatar Fengzhe Zhou Committed by GitHub
Browse files

[Sync] update github workflow (#1156)

parent aa2dd2b5
from opencompass.models import TurboMindModel
settings = [
('qwen-1.8b-turbomind', 'Qwen/Qwen-1_8B', 1),
('qwen-7b-turbomind', 'Qwen/Qwen-7B', 1),
('qwen-14b-turbomind', 'Qwen/Qwen-14B', 1),
('qwen-72b-turbomind', 'Qwen/Qwen-72B', 4),
]
models = []
for abbr, path, num_gpus in settings:
models.append(
dict(
type=TurboMindModel,
abbr=abbr,
path=path,
engine_config=dict(session_len=2048, max_batch_size=16, tp=num_gpus),
gen_config=dict(top_k=1, temperature=1, top_p=0.9, max_new_tokens=1024),
max_out_len=1024,
max_seq_len=2048,
batch_size=16,
concurrency=16,
run_cfg=dict(num_gpus=num_gpus),
)
)
from opencompass.models import LmdeployPytorchModel
settings = [
('yi-6b-pytorch', '01-ai/Yi-6B', 1),
('yi-34b-pytorch', '01-ai/Yi-34B', 2),
]
models = []
for abbr, path, num_gpus in settings:
models.append(
dict(
type=LmdeployPytorchModel,
abbr=abbr,
path=path,
engine_config=dict(session_len=2048, max_batch_size=16, tp=num_gpus),
gen_config=dict(top_k=1, temperature=1, top_p=0.9, max_new_tokens=1024),
max_out_len=1024,
max_seq_len=2048,
batch_size=16,
concurrency=16,
run_cfg=dict(num_gpus=num_gpus),
)
)
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
from mmengine.config import read_base from mmengine.config import read_base
with read_base(): with read_base():
from .groups.cibench import cibench_summary_groups from .groups.legacy.cibench import cibench_summary_groups
from .groups.plugineval import plugineval_summary_groups from .groups.plugineval import plugineval_summary_groups
......
_cibench = ['Pandas', 'Matplotlib', 'Opencv', 'SciPy', 'Seaborn', 'PyTorch']
_cibench = ['cibench_' + i for i in _cibench]
cibench_summary_groups = [{'name': 'cibench', 'subsets': _cibench}]
_cibench_template = ['lightgbm', 'matplotlib', 'nltk', 'opencv', 'pandas', 'pytorch',
'scipy', 'seaborn', 'sklearn', 'tensorflow']
_cibench_template = ['cibench_template/' + i for i in _cibench_template]
# number of total exec questions in this module
_cibench_template_weight = {
'lightgbm': [30, 15, 0, 0],
'matplotlib': [42, 0, 0, 36],
'nltk': [70, 30, 20, 10],
'opencv': [60, 10, 0, 40],
'pandas': [60, 40, 0, 10],
'pytorch': [28, 0, 0, 0],
'scipy': [60, 40, 0, 0],
'seaborn': [42, 0, 0, 35],
'sklearn': [42, 6, 0, 18],
'tensorflow': [36, 6, 0, 12],
}
cibench_summary_groups.extend([
{
'name': 'cibench_template:executable',
'subsets': [[i, 'executable'] for i in _cibench_template],
'weights': {'cibench_template/' + k : v[0] for k,v in _cibench_template_weight.items()},
},
{
'name': 'cibench_template:numeric_correct',
'subsets': [[i, 'numeric_correct'] for i in _cibench_template],
'weights': {'cibench_template/' + k : v[1] for k,v in _cibench_template_weight.items()},
},
{
'name': 'cibench_template:text_score',
'subsets': [[i, 'text_score'] for i in _cibench_template],
'weights': {'cibench_template/' + k : v[2] for k,v in _cibench_template_weight.items()},
},
{
'name': 'cibench_template:vis_sim',
'subsets': [[i, 'vis_sim'] for i in _cibench_template],
'weights': {'cibench_template/' + k : v[3] for k,v in _cibench_template_weight.items()},
},
])
## chinese
_cibench_template_cn = ['lightgbm', 'matplotlib', 'nltk', 'opencv', 'pandas', 'pytorch',
'scipy', 'seaborn', 'sklearn', 'tensorflow']
_cibench_template_cn = ['cibench_template_chinese/' + i for i in _cibench_template_cn]
cibench_summary_groups.extend([
{
'name': 'cibench_template_cn:executable',
'subsets': [[i, 'executable'] for i in _cibench_template_cn],
'weights': {'cibench_template_chinese/' + k : v[0] for k,v in _cibench_template_weight.items()},
},
{
'name': 'cibench_template_cn:numeric_correct',
'subsets': [[i, 'numeric_correct'] for i in _cibench_template_cn],
'weights': {'cibench_template_chinese/' + k : v[1] for k,v in _cibench_template_weight.items()},
},
{
'name': 'cibench_template_cn:text_score',
'subsets': [[i, 'text_score'] for i in _cibench_template_cn],
'weights': {'cibench_template_chinese/' + k : v[2] for k,v in _cibench_template_weight.items()},
},
{
'name': 'cibench_template_cn:vis_sim',
'subsets': [[i, 'vis_sim'] for i in _cibench_template_cn],
'weights': {'cibench_template_chinese/' + k : v[3] for k,v in _cibench_template_weight.items()},
},
])
## add more without nltk
cibench_summary_groups.extend([
{
'name': 'cibench_template_wo_nltk:executable',
'subsets': [[i, 'executable'] for i in _cibench_template if 'nltk' not in i],
'weights': {'cibench_template/' + k : v[0] for k,v in _cibench_template_weight.items() if 'nltk' not in k},
},
{
'name': 'cibench_template_wo_nltk:numeric_correct',
'subsets': [[i, 'numeric_correct'] for i in _cibench_template if 'nltk' not in i],
'weights': {'cibench_template/' + k : v[1] for k,v in _cibench_template_weight.items() if 'nltk' not in k},
},
{
'name': 'cibench_template_wo_nltk:vis_sim',
'subsets': [[i, 'vis_sim'] for i in _cibench_template if 'nltk' not in i],
'weights': {'cibench_template/' + k : v[3] for k,v in _cibench_template_weight.items() if 'nltk' not in k},
},
])
cibench_summary_groups.extend([
{
'name': 'cibench_template_cn_wo_nltk:executable',
'subsets': [[i, 'executable'] for i in _cibench_template_cn if 'nltk' not in i],
'weights': {'cibench_template_chinese/' + k : v[0] for k,v in _cibench_template_weight.items() if 'nltk' not in k},
},
{
'name': 'cibench_template_cn_wo_nltk:numeric_correct',
'subsets': [[i, 'numeric_correct'] for i in _cibench_template_cn if 'nltk' not in i],
'weights': {'cibench_template_chinese/' + k : v[1] for k,v in _cibench_template_weight.items() if 'nltk' not in k},
},
{
'name': 'cibench_template_cn_wo_nltk:vis_sim',
'subsets': [[i, 'vis_sim'] for i in _cibench_template_cn if 'nltk' not in i],
'weights': {'cibench_template_chinese/' + k : v[3] for k,v in _cibench_template_weight.items() if 'nltk' not in k},
},
])
...@@ -170,6 +170,8 @@ def parse_dlc_args(dlc_parser): ...@@ -170,6 +170,8 @@ def parse_dlc_args(dlc_parser):
type=str) type=str)
def parse_hf_args(hf_parser): def parse_hf_args(hf_parser):
"""These args are all for the quick construction of HuggingFace models.""" """These args are all for the quick construction of HuggingFace models."""
hf_parser.add_argument('--hf-type', type=str, choices=['base', 'chat'], default='chat', help='The type of the HuggingFace model, base or chat') hf_parser.add_argument('--hf-type', type=str, choices=['base', 'chat'], default='chat', help='The type of the HuggingFace model, base or chat')
...@@ -212,7 +214,7 @@ def main(): ...@@ -212,7 +214,7 @@ def main():
if args.work_dir is not None: if args.work_dir is not None:
cfg['work_dir'] = args.work_dir cfg['work_dir'] = args.work_dir
else: else:
cfg.setdefault('work_dir', osp.join('outputs', 'default')) cfg.setdefault('work_dir', os.path.join('outputs', 'default'))
# cfg_time_str defaults to the current time # cfg_time_str defaults to the current time
cfg_time_str = dir_time_str = datetime.now().strftime('%Y%m%d_%H%M%S') cfg_time_str = dir_time_str = datetime.now().strftime('%Y%m%d_%H%M%S')
...@@ -340,5 +342,6 @@ def main(): ...@@ -340,5 +342,6 @@ def main():
summarizer.summarize(time_str=cfg_time_str) summarizer.summarize(time_str=cfg_time_str)
if __name__ == '__main__': if __name__ == '__main__':
main() main()
...@@ -7,7 +7,8 @@ from .base import BaseModel, LMTemplateParser # noqa: F401 ...@@ -7,7 +7,8 @@ from .base import BaseModel, LMTemplateParser # noqa: F401
from .base_api import APITemplateParser, BaseAPIModel # noqa: F401 from .base_api import APITemplateParser, BaseAPIModel # noqa: F401
from .bytedance_api import ByteDance # noqa: F401 from .bytedance_api import ByteDance # noqa: F401
from .claude_api import Claude # noqa: F401 from .claude_api import Claude # noqa: F401
from .gemini_api import Gemini, GeminiAllesAPIN # noqa: F401 from .deepseek_api import DeepseekAPI # noqa: F401
from .gemini_api import Gemini # noqa: F401
from .glm import GLM130B # noqa: F401 from .glm import GLM130B # noqa: F401
from .huggingface import HuggingFace # noqa: F401 from .huggingface import HuggingFace # noqa: F401
from .huggingface import HuggingFaceCausalLM # noqa: F401 from .huggingface import HuggingFaceCausalLM # noqa: F401
...@@ -21,7 +22,7 @@ from .lightllm_api import LightllmAPI # noqa: F401 ...@@ -21,7 +22,7 @@ from .lightllm_api import LightllmAPI # noqa: F401
from .llama2 import Llama2, Llama2Chat # noqa: F401 from .llama2 import Llama2, Llama2Chat # noqa: F401
from .lmdeploy_pytorch import LmdeployPytorchModel # noqa: F401 from .lmdeploy_pytorch import LmdeployPytorchModel # noqa: F401
from .lmdeploy_tis import LmdeployTisModel # noqa: F401 from .lmdeploy_tis import LmdeployTisModel # noqa: F401
from .minimax_api import MiniMax # noqa: F401 from .minimax_api import MiniMax, MiniMaxChatCompletionV2 # noqa: F401
from .mistral_api import Mistral # noqa: F401 from .mistral_api import Mistral # noqa: F401
from .mixtral import Mixtral # noqa: F401 from .mixtral import Mixtral # noqa: F401
from .modelscope import ModelScope, ModelScopeCausalLM # noqa: F401 from .modelscope import ModelScope, ModelScopeCausalLM # noqa: F401
...@@ -31,11 +32,12 @@ from .openai_api import OpenAI # noqa: F401 ...@@ -31,11 +32,12 @@ from .openai_api import OpenAI # noqa: F401
from .pangu_api import PanGu # noqa: F401 from .pangu_api import PanGu # noqa: F401
from .qwen_api import Qwen # noqa: F401 from .qwen_api import Qwen # noqa: F401
from .sensetime_api import SenseTime # noqa: F401 from .sensetime_api import SenseTime # noqa: F401
from .stepfun_api import StepFun # noqa: F401
from .turbomind import TurboMindModel # noqa: F401 from .turbomind import TurboMindModel # noqa: F401
from .turbomind_tis import TurboMindTisModel # noqa: F401 from .turbomind_tis import TurboMindTisModel # noqa: F401
from .unigpt_api import UniGPT # noqa: F401 from .unigpt_api import UniGPT # noqa: F401
from .vllm import VLLM # noqa: F401 from .vllm import VLLM # noqa: F401
from .xunfei_api import XunFei # noqa: F401 from .xunfei_api import XunFei, XunFeiSpark # noqa: F401
from .yayi_api import Yayi # noqa: F401 from .yayi_api import Yayi # noqa: F401
from .zhipuai_api import ZhiPuAI # noqa: F401 from .zhipuai_api import ZhiPuAI # noqa: F401
from .zhipuai_v2_api import ZhiPuV2AI # noqa: F401 from .zhipuai_v2_api import ZhiPuV2AI # noqa: F401
import time
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from typing import Dict, List, Optional, Union from typing import Dict, List, Optional, Union
...@@ -141,29 +140,32 @@ class AI360GPT(BaseAPIModel): ...@@ -141,29 +140,32 @@ class AI360GPT(BaseAPIModel):
self.wait() self.wait()
continue continue
if raw_response.status_code == 200: if raw_response.status_code == 200:
try: msg = response['choices'][0]['message']['content'].strip()
msg = response['choices'][0]['message']['content'].strip() self.logger.debug(f'Generated: {msg}')
return msg return msg
except KeyError:
if 'error' in response:
# tpm(token per minitue) limit
if response['erro']['code'] == '1005':
time.sleep(1)
continue
self.logger.error('Find error message in response: ',
str(response['error']))
# sensitive content, prompt overlength, network error # sensitive content, prompt overlength, network error
# or illegal prompt # or illegal prompt
if (raw_response.status_code == 400 if raw_response.status_code in [400, 401, 402, 429, 500]:
or raw_response.status_code == 401 if 'error' not in response:
or raw_response.status_code == 402 print(raw_response.status_code)
or raw_response.status_code == 429 print(raw_response.text)
or raw_response.status_code == 500): continue
print(raw_response.text) print(response)
continue # tpm(token per minitue) limit
if response['error']['code'] == '1005':
self.logger.debug('tpm limit, ignoring')
continue
elif response['error']['code'] == '1001':
msg = '参数错误:messages参数过长或max_tokens参数值过大'
self.logger.debug(f'Generated: {msg}')
return msg
else:
print(response)
self.logger.error('Find error message in response: ',
str(response['error']))
print(raw_response) print(raw_response)
max_num_retries += 1 max_num_retries += 1
......
...@@ -145,8 +145,8 @@ class BaiChuan(BaseAPIModel): ...@@ -145,8 +145,8 @@ class BaiChuan(BaseAPIModel):
self.wait() self.wait()
continue continue
if raw_response.status_code == 200: if raw_response.status_code == 200:
msg = response['choices'][0]['message']['content'] msg = response['choices'][0]['message']['content']
self.logger.debug(f'Generated: {msg}')
return msg return msg
if raw_response.status_code != 200: if raw_response.status_code != 200:
......
...@@ -53,6 +53,8 @@ class ERNIEBot(BaseAPIModel): ...@@ -53,6 +53,8 @@ class ERNIEBot(BaseAPIModel):
self.headers = {'Content_Type': 'application/json'} self.headers = {'Content_Type': 'application/json'}
self.secretkey = secretkey self.secretkey = secretkey
self.key = key self.key = key
if not url.endswith('?access_token='):
url += '?access_token='
self.url = url self.url = url
access_token, _ = self._generate_access_token() access_token, _ = self._generate_access_token()
self.access_token = access_token self.access_token = access_token
...@@ -143,14 +145,25 @@ class ERNIEBot(BaseAPIModel): ...@@ -143,14 +145,25 @@ class ERNIEBot(BaseAPIModel):
messages = [{'role': 'user', 'content': input}] messages = [{'role': 'user', 'content': input}]
else: else:
messages = [] messages = []
msg_buffer, last_role = [], None
for item in input: for item in input:
msg = {'content': item['prompt']} if item['role'] == 'BOT':
if item['role'] == 'HUMAN': role = 'assistant'
msg['role'] = 'user' else: # USER or SYSTEM
elif item['role'] == 'BOT': role = 'user'
msg['role'] = 'assistant' if role != last_role and last_role is not None:
messages.append({
'content': '\n'.join(msg_buffer),
'role': last_role
})
msg_buffer = []
msg_buffer.append(item['prompt'])
last_role = role
messages.append({
'content': '\n'.join(msg_buffer),
'role': last_role
})
messages.append(msg)
data = {'messages': messages} data = {'messages': messages}
data.update(self.generation_kwargs) data.update(self.generation_kwargs)
...@@ -181,6 +194,7 @@ class ERNIEBot(BaseAPIModel): ...@@ -181,6 +194,7 @@ class ERNIEBot(BaseAPIModel):
if raw_response.status_code == 200: if raw_response.status_code == 200:
try: try:
msg = response['result'] msg = response['result']
self.logger.debug(msg)
return msg return msg
except KeyError: except KeyError:
print(response) print(response)
...@@ -188,9 +202,12 @@ class ERNIEBot(BaseAPIModel): ...@@ -188,9 +202,12 @@ class ERNIEBot(BaseAPIModel):
if response['error_code'] == 336007: if response['error_code'] == 336007:
# exceed max length # exceed max length
return '' return ''
elif response['error_code'] == 336103:
time.sleep(1) # prompt tokens too long
continue return ''
else:
time.sleep(1)
continue
if (response['error_code'] == 110 or response['error_code'] == 100 if (response['error_code'] == 110 or response['error_code'] == 100
or response['error_code'] == 111 or response['error_code'] == 111
......
import time
from concurrent.futures import ThreadPoolExecutor
from typing import Dict, List, Optional, Union
import requests
from opencompass.utils.prompt import PromptList
from .base_api import BaseAPIModel
PromptType = Union[PromptList, str]
class DeepseekAPI(BaseAPIModel):
"""Model wrapper around DeepseekAPI.
Documentation:
Args:
path (str): The name of DeepseekAPI model.
e.g. `moonshot-v1-32k`
key (str): Authorization key.
query_per_second (int): The maximum queries allowed per second
between two consecutive calls of the API. Defaults to 1.
max_seq_len (int): Unused here.
meta_template (Dict, optional): The model's meta prompt
template if needed, in case the requirement of injecting or
wrapping of any meta instructions.
retry (int): Number of retires if the API call fails. Defaults to 2.
"""
def __init__(
self,
path: str,
key: str,
url: str,
query_per_second: int = 2,
max_seq_len: int = 2048,
meta_template: Optional[Dict] = None,
retry: int = 2,
system_prompt: str = '',
):
super().__init__(path=path,
max_seq_len=max_seq_len,
query_per_second=query_per_second,
meta_template=meta_template,
retry=retry)
self.headers = {
'Content-Type': 'application/json',
'Authorization': 'Bearer ' + key,
}
self.url = url
self.model = path
self.system_prompt = system_prompt
def generate(
self,
inputs: List[PromptType],
max_out_len: int = 512,
) -> List[str]:
"""Generate results given a list of inputs.
Args:
inputs (List[PromptType]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
Returns:
List[str]: A list of generated strings.
"""
with ThreadPoolExecutor() as executor:
results = list(
executor.map(self._generate, inputs,
[max_out_len] * len(inputs)))
self.flush()
return results
def _generate(
self,
input: PromptType,
max_out_len: int = 512,
) -> str:
"""Generate results given an input.
Args:
inputs (PromptType): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
Returns:
str: The generated string.
"""
assert isinstance(input, (str, PromptList))
if isinstance(input, str):
messages = [{'role': 'user', 'content': input}]
else:
messages = []
msg_buffer, last_role = [], None
for item in input:
item['role'] = 'assistant' if item['role'] == 'BOT' else 'user'
if item['role'] != last_role and last_role is not None:
messages.append({
'content': '\n'.join(msg_buffer),
'role': last_role
})
msg_buffer = []
msg_buffer.append(item['prompt'])
last_role = item['role']
messages.append({
'content': '\n'.join(msg_buffer),
'role': last_role
})
if self.system_prompt:
system = {'role': 'system', 'content': self.system_prompt}
messages.insert(0, system)
data = {'model': self.model, 'messages': messages}
max_num_retries = 0
while max_num_retries < self.retry:
self.acquire()
try:
raw_response = requests.request('POST',
url=self.url,
headers=self.headers,
json=data)
except Exception as err:
print('Request Error:{}'.format(err))
time.sleep(2)
continue
try:
response = raw_response.json()
except Exception as err:
print('Response Error:{}'.format(err))
response = None
self.release()
if response is None:
print('Connection error, reconnect.')
# if connect error, frequent requests will casuse
# continuous unstable network, therefore wait here
# to slow down the request
self.wait()
continue
if raw_response.status_code == 200:
# msg = json.load(response.text)
# response
msg = response['choices'][0]['message']['content']
self.logger.debug(f'Generated: {msg}')
return msg
if raw_response.status_code == 401:
print('请求被拒绝 api_key错误')
continue
elif raw_response.status_code == 400:
print(messages, response)
print('请求失败,状态码:', raw_response)
msg = 'The request was rejected because high risk'
return msg
elif raw_response.status_code == 429:
print(messages, response)
print('请求失败,状态码:', raw_response)
time.sleep(5)
continue
else:
print(messages, response)
print('请求失败,状态码:', raw_response)
time.sleep(1)
max_num_retries += 1
raise RuntimeError(raw_response)
...@@ -186,66 +186,3 @@ class Gemini(BaseAPIModel): ...@@ -186,66 +186,3 @@ class Gemini(BaseAPIModel):
time.sleep(1) time.sleep(1)
raise RuntimeError('API call failed.') raise RuntimeError('API call failed.')
class GeminiAllesAPIN(Gemini):
"""Model wrapper around Gemini models.
Documentation:
Args:
path (str): The name of Gemini model.
e.g. `gemini-pro`
key (str): Authorization key.
query_per_second (int): The maximum queries allowed per second
between two consecutive calls of the API. Defaults to 1.
max_seq_len (int): Unused here.
meta_template (Dict, optional): The model's meta prompt
template if needed, in case the requirement of injecting or
wrapping of any meta instructions.
retry (int): Number of retires if the API call fails. Defaults to 2.
"""
def __init__(
self,
path: str,
key: str,
url: str,
query_per_second: int = 2,
max_seq_len: int = 2048,
meta_template: Optional[Dict] = None,
retry: int = 2,
temperature: float = 1.0,
top_p: float = 0.8,
top_k: float = 10.0,
):
super().__init__(key=key,
path=path,
max_seq_len=max_seq_len,
query_per_second=query_per_second,
meta_template=meta_template,
retry=retry)
# Replace the url and headers into AllesApin
self.url = url
self.headers = {
'alles-apin-token': key,
'content-type': 'application/json',
}
def generate(
self,
inputs: List[PromptType],
max_out_len: int = 512,
) -> List[str]:
"""Generate results given a list of inputs.
Args:
inputs (List[PromptType]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
Returns:
List[str]: A list of generated strings.
"""
return super().generate(inputs, max_out_len)
...@@ -289,13 +289,13 @@ class HuggingFace(BaseModel): ...@@ -289,13 +289,13 @@ class HuggingFace(BaseModel):
tokens = self.tokenizer.batch_encode_plus(inputs, tokens = self.tokenizer.batch_encode_plus(inputs,
padding=True, padding=True,
truncation=True, truncation=True,
max_length=self.max_seq_len - max_length=self.max_seq_len)
max_out_len)
tokens = { tokens = {
k: torch.tensor(np.array(tokens[k]), device=self.model.device) k: torch.tensor(np.array(tokens[k]), device=self.model.device)
for k in tokens if k in ['input_ids', 'attention_mask'] for k in tokens if k in ['input_ids', 'attention_mask']
} }
origin_stopping_criteria = stopping_criteria
if stopping_criteria: if stopping_criteria:
# Construct huggingface stopping criteria # Construct huggingface stopping criteria
if self.tokenizer.eos_token is not None: if self.tokenizer.eos_token is not None:
...@@ -332,6 +332,9 @@ class HuggingFace(BaseModel): ...@@ -332,6 +332,9 @@ class HuggingFace(BaseModel):
if self.end_str: if self.end_str:
decodeds = [token.split(self.end_str)[0] for token in decodeds] decodeds = [token.split(self.end_str)[0] for token in decodeds]
if origin_stopping_criteria:
for t in origin_stopping_criteria:
decodeds = [token.split(t)[0] for token in decodeds]
return decodeds return decodeds
def _single_generate(self, def _single_generate(self,
...@@ -382,6 +385,7 @@ class HuggingFace(BaseModel): ...@@ -382,6 +385,7 @@ class HuggingFace(BaseModel):
max_length=self.max_seq_len - max_length=self.max_seq_len -
max_out_len)['input_ids'] max_out_len)['input_ids']
input_ids = torch.tensor(input_ids, device=self.model.device) input_ids = torch.tensor(input_ids, device=self.model.device)
origin_stopping_criteria = stopping_criteria
if stopping_criteria: if stopping_criteria:
# Construct huggingface stopping criteria # Construct huggingface stopping criteria
if self.tokenizer.eos_token is not None: if self.tokenizer.eos_token is not None:
...@@ -419,6 +423,9 @@ class HuggingFace(BaseModel): ...@@ -419,6 +423,9 @@ class HuggingFace(BaseModel):
if self.end_str: if self.end_str:
decodeds = [token.split(self.end_str)[0] for token in decodeds] decodeds = [token.split(self.end_str)[0] for token in decodeds]
if origin_stopping_criteria:
for t in origin_stopping_criteria:
decodeds = [token.split(t)[0] for token in decodeds]
return decodeds return decodeds
def get_logits(self, inputs: List[str]): def get_logits(self, inputs: List[str]):
......
...@@ -180,3 +180,173 @@ class MiniMax(BaseAPIModel): ...@@ -180,3 +180,173 @@ class MiniMax(BaseAPIModel):
max_num_retries += 1 max_num_retries += 1
raise RuntimeError(response.text) raise RuntimeError(response.text)
class MiniMaxChatCompletionV2(BaseAPIModel):
"""Model wrapper around MiniMax ChatCompletionV2.
Documentation:
Args:
path (str): The name of MiniMax model.
e.g. `moonshot-v1-32k`
key (str): Authorization key.
query_per_second (int): The maximum queries allowed per second
between two consecutive calls of the API. Defaults to 1.
max_seq_len (int): Unused here.
meta_template (Dict, optional): The model's meta prompt
template if needed, in case the requirement of injecting or
wrapping of any meta instructions.
retry (int): Number of retires if the API call fails. Defaults to 2.
"""
def __init__(
self,
path: str,
key: str,
url: str,
query_per_second: int = 2,
max_seq_len: int = 2048,
meta_template: Optional[Dict] = None,
retry: int = 2,
):
super().__init__(path=path,
max_seq_len=max_seq_len,
query_per_second=query_per_second,
meta_template=meta_template,
retry=retry)
self.headers = {
'Content-Type': 'application/json',
'Authorization': 'Bearer ' + key,
}
self.url = url
self.model = path
def generate(
self,
inputs: List[PromptType],
max_out_len: int = 512,
) -> List[str]:
"""Generate results given a list of inputs.
Args:
inputs (List[PromptType]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
Returns:
List[str]: A list of generated strings.
"""
with ThreadPoolExecutor() as executor:
results = list(
executor.map(self._generate, inputs,
[max_out_len] * len(inputs)))
self.flush()
return results
def _generate(
self,
input: PromptType,
max_out_len: int = 512,
) -> str:
"""Generate results given an input.
Args:
inputs (PromptType): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
Returns:
str: The generated string.
"""
assert isinstance(input, (str, PromptList))
if isinstance(input, str):
messages = [{'role': 'user', 'content': input}]
else:
messages = []
msg_buffer, last_role = [], None
for item in input:
item['role'] = 'assistant' if item['role'] == 'BOT' else 'user'
if item['role'] != last_role and last_role is not None:
messages.append({
'content': '\n'.join(msg_buffer),
'role': last_role
})
msg_buffer = []
msg_buffer.append(item['prompt'])
last_role = item['role']
messages.append({
'content': '\n'.join(msg_buffer),
'role': last_role
})
data = {
'model': self.model,
'messages': messages,
'max_tokens': max_out_len
}
max_num_retries = 0
while max_num_retries < self.retry:
self.acquire()
try:
raw_response = requests.request('POST',
url=self.url,
headers=self.headers,
json=data)
except Exception as err:
print('Request Error:{}'.format(err))
time.sleep(2)
continue
response = raw_response.json()
self.release()
if response is None:
print('Connection error, reconnect.')
# if connect error, frequent requests will casuse
# continuous unstable network, therefore wait here
# to slow down the request
self.wait()
continue
if raw_response.status_code == 200:
try:
msg = response['choices'][0]['message']['content']
self.logger.debug(f'Generated: {msg}')
return msg
except Exception:
code = response.get('base_resp', {}).get('status_code')
if code == 1002:
# rate limit
time.sleep(1)
continue
elif code == 1027:
return 'The request was rejected because high risk'
print(messages, response)
pass
elif raw_response.status_code == 401:
print('请求被拒绝 api_key错误')
continue
elif raw_response.status_code == 400:
print(messages, response)
print('请求失败,状态码:', raw_response)
msg = 'The request was rejected because high risk'
return msg
elif raw_response.status_code == 429:
print(messages, response)
print('请求失败,状态码:', raw_response)
time.sleep(5)
continue
else:
print(messages, response)
print('请求失败,状态码:', raw_response)
time.sleep(1)
max_num_retries += 1
raise RuntimeError(raw_response)
...@@ -152,8 +152,7 @@ class Qwen(BaseAPIModel): ...@@ -152,8 +152,7 @@ class Qwen(BaseAPIModel):
if response.status_code == 200: if response.status_code == 200:
try: try:
msg = response.output.text msg = response.output.text
print('=' * 128) self.logger.debug(msg)
print(msg)
return msg return msg
except KeyError: except KeyError:
print(response) print(response)
......
import time
from concurrent.futures import ThreadPoolExecutor
from typing import Dict, List, Optional, Union
import requests
from opencompass.utils.prompt import PromptList
from .base_api import BaseAPIModel
PromptType = Union[PromptList, str]
class StepFun(BaseAPIModel):
"""Model wrapper around StepFun.
Documentation:
Args:
path (str): The name of StepFun model.
e.g. `moonshot-v1-32k`
key (str): Authorization key.
query_per_second (int): The maximum queries allowed per second
between two consecutive calls of the API. Defaults to 1.
max_seq_len (int): Unused here.
meta_template (Dict, optional): The model's meta prompt
template if needed, in case the requirement of injecting or
wrapping of any meta instructions.
retry (int): Number of retires if the API call fails. Defaults to 2.
"""
def __init__(
self,
path: str,
key: str,
url: str,
query_per_second: int = 2,
max_seq_len: int = 2048,
meta_template: Optional[Dict] = None,
retry: int = 2,
system_prompt: str = '',
):
super().__init__(path=path,
max_seq_len=max_seq_len,
query_per_second=query_per_second,
meta_template=meta_template,
retry=retry)
self.headers = {
'Content-Type': 'application/json',
'Authorization': 'Bearer ' + key,
}
self.url = url
self.model = path
self.system_prompt = system_prompt
def generate(
self,
inputs: List[PromptType],
max_out_len: int = 512,
) -> List[str]:
"""Generate results given a list of inputs.
Args:
inputs (List[PromptType]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
Returns:
List[str]: A list of generated strings.
"""
with ThreadPoolExecutor() as executor:
results = list(
executor.map(self._generate, inputs,
[max_out_len] * len(inputs)))
self.flush()
return results
def _generate(
self,
input: PromptType,
max_out_len: int = 512,
) -> str:
"""Generate results given an input.
Args:
inputs (PromptType): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
Returns:
str: The generated string.
"""
assert isinstance(input, (str, PromptList))
if isinstance(input, str):
messages = [{'role': 'user', 'content': input}]
else:
messages = []
msg_buffer, last_role = [], None
for item in input:
item['role'] = 'assistant' if item['role'] == 'BOT' else 'user'
if item['role'] != last_role and last_role is not None:
messages.append({
'content': '\n'.join(msg_buffer),
'role': last_role
})
msg_buffer = []
msg_buffer.append(item['prompt'])
last_role = item['role']
messages.append({
'content': '\n'.join(msg_buffer),
'role': last_role
})
if self.system_prompt:
system = {'role': 'system', 'content': self.system_prompt}
messages.insert(0, system)
data = {'model': self.model, 'messages': messages}
max_num_retries = 0
while max_num_retries < self.retry:
self.acquire()
try:
raw_response = requests.request('POST',
url=self.url,
headers=self.headers,
json=data)
except Exception as err:
print('Request Error:{}'.format(err))
time.sleep(2)
continue
try:
response = raw_response.json()
except Exception:
response = None
self.release()
if response is None:
print('Connection error, reconnect.')
# if connect error, frequent requests will casuse
# continuous unstable network, therefore wait here
# to slow down the request
self.wait()
continue
if raw_response.status_code == 200:
# msg = json.load(response.text)
# response
msg = response['choices'][0]['message']['content']
self.logger.debug(f'Generated: {msg}')
return msg
if raw_response.status_code == 400:
print(messages, response)
print('请求失败,状态码:', raw_response)
msg = 'The context length exceeded'
return msg
elif raw_response.status_code == 403:
print('请求被拒绝 api_key错误')
continue
elif raw_response.status_code == 429:
print(messages, response)
print('请求失败,状态码:', raw_response)
time.sleep(5)
continue
elif raw_response.status_code == 451:
print(messages, response)
print('请求失败,状态码:', raw_response)
msg = 'The request was rejected because high risk'
return msg
else:
print(messages, response)
print('请求失败,状态码:', raw_response)
time.sleep(1)
max_num_retries += 1
raise RuntimeError(raw_response)
...@@ -55,9 +55,6 @@ class TurboMindModel(BaseModel): ...@@ -55,9 +55,6 @@ class TurboMindModel(BaseModel):
if engine_config is not None: if engine_config is not None:
from lmdeploy.messages import TurbomindEngineConfig from lmdeploy.messages import TurbomindEngineConfig
engine_config = TurbomindEngineConfig(**engine_config) engine_config = TurbomindEngineConfig(**engine_config)
if gen_config is not None:
from lmdeploy.messages import EngineGenerationConfig
gen_config = EngineGenerationConfig(**gen_config)
self.logger = get_logger() self.logger = get_logger()
tm_model = TurboMind.from_pretrained(path, engine_config=engine_config) tm_model = TurboMind.from_pretrained(path, engine_config=engine_config)
self.tokenizer = tm_model.tokenizer self.tokenizer = tm_model.tokenizer
...@@ -106,6 +103,7 @@ class TurboMindModel(BaseModel): ...@@ -106,6 +103,7 @@ class TurboMindModel(BaseModel):
t = self.tokenizer.encode(t, add_bos=False) t = self.tokenizer.encode(t, add_bos=False)
stop_words.append(t[0]) stop_words.append(t[0])
gen_config['stop_words'] = list(set(stop_words)) gen_config['stop_words'] = list(set(stop_words))
gen_config.setdefault('min_new_tokens', 1)
from lmdeploy.messages import EngineGenerationConfig from lmdeploy.messages import EngineGenerationConfig
gen_config = EngineGenerationConfig(**gen_config) gen_config = EngineGenerationConfig(**gen_config)
...@@ -123,6 +121,9 @@ class TurboMindModel(BaseModel): ...@@ -123,6 +121,9 @@ class TurboMindModel(BaseModel):
[gen_config] * len(batch_input), [gen_config] * len(batch_input),
)) ))
results += _results results += _results
if stopping_criteria:
for s in stopping_criteria:
results = [r.split(s)[0] for r in results]
return results return results
def get_token_len(self, prompt: str) -> int: def get_token_len(self, prompt: str) -> int:
......
import json import json
import re
import time
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from typing import Dict, List, Optional, Union from typing import Dict, List, Optional, Union
...@@ -221,3 +223,150 @@ class XunFei(BaseAPIModel): ...@@ -221,3 +223,150 @@ class XunFei(BaseAPIModel):
if err_code == 10013: if err_code == 10013:
return err_data['header']['message'] return err_data['header']['message']
raise RuntimeError(f'Code: {err_code}, data: {err_data}') raise RuntimeError(f'Code: {err_code}, data: {err_data}')
class XunFeiSpark(BaseAPIModel):
"""Model wrapper around XunFeiSpark.
Documentation:
Args:
path (str): The name of XunFeiSpark model.
e.g. `moonshot-v1-32k`
key (str): Authorization key.
query_per_second (int): The maximum queries allowed per second
between two consecutive calls of the API. Defaults to 1.
max_seq_len (int): Unused here.
meta_template (Dict, optional): The model's meta prompt
template if needed, in case the requirement of injecting or
wrapping of any meta instructions.
retry (int): Number of retires if the API call fails. Defaults to 2.
"""
def __init__(
self,
path: str,
url: str,
app_id: str,
api_key: str,
api_secret: str,
query_per_second: int = 2,
max_seq_len: int = 2048,
meta_template: Optional[Dict] = None,
retry: int = 2,
):
super().__init__(path=path,
max_seq_len=max_seq_len,
query_per_second=query_per_second,
meta_template=meta_template,
retry=retry)
try:
from sparkai.llm.llm import ChatSparkLLM # noqa: F401
except ImportError:
raise ImportError('run `pip install --upgrade spark_ai_python`')
self.spark_domain = path
self.url = url
self.app_id = app_id
self.api_key = api_key
self.api_secret = api_secret
def generate(
self,
inputs: List[PromptType],
max_out_len: int = 512,
) -> List[str]:
results = [self._generate(input, max_out_len) for input in inputs]
return results
def _generate(
self,
input: PromptType,
max_out_len: int = 512,
) -> str:
assert isinstance(input, (str, PromptList))
from sparkai.core.messages import ChatMessage
from sparkai.llm.llm import ChatSparkLLM
if isinstance(input, str):
messages = [ChatMessage(role='user', content=input)]
else:
messages = []
msg_buffer, last_role = [], None
for index, item in enumerate(input):
if index == 0 and item['role'] == 'SYSTEM':
role = 'system'
elif item['role'] == 'BOT':
role = 'assistant'
else:
role = 'user'
if role != last_role and last_role is not None:
content = '\n'.join(msg_buffer)
messages.append(
ChatMessage(role=last_role, content=content))
msg_buffer = []
msg_buffer.append(item['prompt'])
last_role = role
content = '\n'.join(msg_buffer)
messages.append(ChatMessage(role=last_role, content=content))
spark = ChatSparkLLM(
spark_api_url=self.url,
spark_app_id=self.app_id,
spark_api_key=self.api_key,
spark_api_secret=self.api_secret,
spark_llm_domain=self.spark_domain,
streaming=False,
max_tokens=max_out_len,
)
all_empty_response = True
for _ in range(self.retry + 1):
try:
outputs = spark.generate([messages]).generations[0]
if len(outputs) == 0:
self.logger.error('Empty response, retrying...')
continue
msg = outputs[0].text
self.logger.debug(f'Generated: {msg}')
return msg
except ConnectionError as e:
match = re.match(r'Error Code: (\d+), Error: (.*)',
e.args[0],
flags=re.DOTALL)
if match:
error_code = int(match.group(1))
msg = match.group(2)
if error_code == 10003: # query data exceed limit
self.logger.error(f'Error {error_code}: {msg}')
return msg
elif error_code in [10013, 10014]: # skip safety problem
self.logger.debug(f'Generated: {msg}')
return msg
elif error_code == 10020: # plugin result is empty
self.logger.error(f'Error {error_code}: {msg}')
return msg
elif error_code == 11202: # qps limit
time.sleep(1)
else:
self.logger.error(f'Error {error_code}: {msg}')
raise e
raise e
except TimeoutError:
self.logger.error('TimeoutError, sleep 60, retrying...')
time.sleep(60)
except Exception as e:
self.logger.error(str(e))
pass
all_empty_response = False
if all_empty_response:
self.logger.error('All empty response')
return 'all empty response'
raise RuntimeError('Failed to generate response')
...@@ -141,7 +141,7 @@ class DLCRunner(BaseRunner): ...@@ -141,7 +141,7 @@ class DLCRunner(BaseRunner):
hf_offline = self.aliyun_cfg.get('hf_offline', True) hf_offline = self.aliyun_cfg.get('hf_offline', True)
if hf_offline: if hf_offline:
shell_cmd += 'export HF_DATASETS_OFFLINE=1; export TRANSFORMERS_OFFLINE=1; export HF_EVALUATE_OFFLINE=1; ' # noqa: E501 shell_cmd += 'export HF_DATASETS_OFFLINE=1; export TRANSFORMERS_OFFLINE=1; export HF_EVALUATE_OFFLINE=1; export HF_HUB_OFFLINE=1; ' # noqa: E501
http_proxy = self.aliyun_cfg.get('http_proxy') http_proxy = self.aliyun_cfg.get('http_proxy')
if http_proxy is not None: if http_proxy is not None:
...@@ -158,6 +158,7 @@ class DLCRunner(BaseRunner): ...@@ -158,6 +158,7 @@ class DLCRunner(BaseRunner):
shell_cmd += f'export {extra_env}; ' shell_cmd += f'export {extra_env}; '
shell_cmd += f'cd {pwd}; ' shell_cmd += f'cd {pwd}; '
shell_cmd += 'umask 0000; '
shell_cmd += '{task_cmd}' shell_cmd += '{task_cmd}'
tmpl = ('dlc create job' tmpl = ('dlc create job'
...@@ -195,7 +196,10 @@ class DLCRunner(BaseRunner): ...@@ -195,7 +196,10 @@ class DLCRunner(BaseRunner):
index_to_start = 0 index_to_start = 0
while index_to_start < num_retry_to_start: while index_to_start < num_retry_to_start:
index_to_start += 1 index_to_start += 1
output = subprocess.getoutput(cmd) try:
output = subprocess.getoutput(cmd)
except BlockingIOError:
output = ''
match = re.search(r'\|\s+(dlc[0-9a-z]+)\s+\|', output) match = re.search(r'\|\s+(dlc[0-9a-z]+)\s+\|', output)
if match is None: if match is None:
stdout.write('Failed to get job id from output:') stdout.write('Failed to get job id from output:')
...@@ -264,7 +268,10 @@ class DLCRunner(BaseRunner): ...@@ -264,7 +268,10 @@ class DLCRunner(BaseRunner):
f" -c {self.aliyun_cfg['dlc_config_path']}" f" -c {self.aliyun_cfg['dlc_config_path']}"
f' --start_time {pri_time}' f' --start_time {pri_time}'
f' --end_time {cur_time}') f' --end_time {cur_time}')
log_output = subprocess.getoutput(logs_cmd) try:
log_output = subprocess.getoutput(logs_cmd)
except BlockingIOError:
log_output = '[WARN] No logs found for the pod'
if '[WARN] No logs found for the pod' not in log_output: if '[WARN] No logs found for the pod' not in log_output:
pri_time = cur_time pri_time = cur_time
......
...@@ -46,17 +46,19 @@ class LocalRunner(BaseRunner): ...@@ -46,17 +46,19 @@ class LocalRunner(BaseRunner):
lark_bot_url (str): Lark bot url. lark_bot_url (str): Lark bot url.
""" """
def __init__( def __init__(self,
self, task: ConfigDict,
task: ConfigDict, max_num_workers: int = 16,
max_num_workers: int = 16, debug: bool = False,
debug: bool = False, max_workers_per_gpu: int = 1,
max_workers_per_gpu: int = 1, lark_bot_url: str = None,
lark_bot_url: str = None, **kwargs):
):
super().__init__(task=task, debug=debug, lark_bot_url=lark_bot_url) super().__init__(task=task, debug=debug, lark_bot_url=lark_bot_url)
self.max_num_workers = max_num_workers self.max_num_workers = max_num_workers
self.max_workers_per_gpu = max_workers_per_gpu self.max_workers_per_gpu = max_workers_per_gpu
logger = get_logger()
for k, v in kwargs.items():
logger.warning(f'Ignored argument in {self.__module__}: {k}={v}')
def launch(self, tasks: List[Dict[str, Any]]) -> List[Tuple[str, int]]: def launch(self, tasks: List[Dict[str, Any]]) -> List[Tuple[str, int]]:
"""Launch multiple tasks. """Launch multiple tasks.
......
...@@ -94,11 +94,11 @@ def first_option_postprocess(text: str, options: str, cushion=True) -> str: ...@@ -94,11 +94,11 @@ def first_option_postprocess(text: str, options: str, cushion=True) -> str:
f'答案是\s?(\S+)(?:。|$)', f'答案是\s?(\S+)(?:。|$)',
f'答案应该是\s?(\S+)(?:。|$)', f'答案应该是\s?(\S+)(?:。|$)',
f'答案为\s?(\S+)(?:。|$)', f'答案为\s?(\S+)(?:。|$)',
f'[Tt]he answer is \(?([{options}])\)?', f'[Tt]he answer is:?\s+\(?([{options}])\)?',
f'[Tt]he answer is option \(?([{options}])\)?', f'[Tt]he answer is option:?\s+\(?([{options}])\)?',
f'[Tt]he correct answer is \(?([{options}])\)?', f'[Tt]he correct answer is:?\s+\(?([{options}])\)?',
f'[Tt]he correct answer is option \(?([{options}])\)?', f'[Tt]he correct answer is option:?\s+\(?([{options}])\)?',
f'[Tt]he answer to the question is \(?([{options}])\)?', f'[Tt]he answer to the question is:?\s+\(?([{options}])\)?',
f'^选项\s?([{options}])', f'^选项\s?([{options}])',
f'^([{options}])\s?选?项', f'^([{options}])\s?选?项',
f'(\s|^)[{options}][\s。,,::\.$]', f'(\s|^)[{options}][\s。,,::\.$]',
...@@ -116,7 +116,7 @@ def first_option_postprocess(text: str, options: str, cushion=True) -> str: ...@@ -116,7 +116,7 @@ def first_option_postprocess(text: str, options: str, cushion=True) -> str:
if cushion: if cushion:
patterns.extend(cushion_patterns) patterns.extend(cushion_patterns)
for pattern in patterns: for pattern in patterns:
match = re.search(pattern, text) match = re.search(pattern, text, re.DOTALL)
if match: if match:
outputs = match.group(0) outputs = match.group(0)
for i in options: for i in options:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment