Commit bc5ebf0f authored by luopl's avatar luopl
Browse files

Initial commit

parents
Pipeline #2167 canceled with stages
try:
import torch
except ImportError:
pass
from .smp import *
from .api import *
from .dataset import *
from .utils import *
from .vlm import *
from .config import *
from .tools import cli
load_env()
__version__ = '0.2rc1'
from .gpt import OpenAIWrapper, GPT4V
from .hf_chat_model import HFChatModel
from .gemini import GeminiWrapper, GeminiProVision
from .qwen_vl_api import QwenVLWrapper, QwenVLAPI, Qwen2VLAPI
from .qwen_api import QwenAPI
from .claude import Claude_Wrapper, Claude3V
from .reka import Reka
from .glm_vision import GLMVisionAPI
from .cloudwalk import CWWrapper
from .sensechat_vision import SenseChatVisionAPI
from .siliconflow import SiliconFlowAPI, TeleMMAPI
from .hunyuan import HunyuanVision
from .bailingmm import bailingMMAPI
from .bluelm_v_api import BlueLMWrapper, BlueLM_V_API
from .jt_vl_chat import JTVLChatAPI
from .taiyi import TaiyiAPI
from .lmdeploy import LMDeployAPI
from .taichu import TaichuVLAPI
__all__ = [
'OpenAIWrapper', 'HFChatModel', 'GeminiWrapper', 'GPT4V',
'GeminiProVision', 'QwenVLWrapper', 'QwenVLAPI', 'QwenAPI',
'Claude3V', 'Claude_Wrapper', 'Reka', 'GLMVisionAPI',
'CWWrapper', 'SenseChatVisionAPI', 'HunyuanVision', 'Qwen2VLAPI',
'BlueLMWrapper', 'BlueLM_V_API', 'JTVLChatAPI', 'bailingMMAPI',
'TaiyiAPI', 'TeleMMAPI', 'SiliconFlowAPI', 'LMDeployAPI',
'TaichuVLAPI'
]
import base64
from vlmeval.smp import *
from vlmeval.api.base import BaseAPI
from vlmeval.dataset import DATASET_TYPE
from vlmeval.smp.vlm import encode_image_file_to_base64
import time
class bailingMMWrapper(BaseAPI):
is_api: bool = True
def __init__(self,
model: str,
retry: int = 5,
wait: int = 5,
key: str = None,
verbose: bool = True,
system_prompt: str = None,
max_tokens: int = 1024,
proxy: str = None,
**kwargs):
self.model = model
self.fail_msg = 'Failed to obtain answer via bailingMM API.'
if key is None:
key = os.environ.get('BAILINGMM_API_KEY', None)
assert key is not None, ('Please set the API Key for bailingMM.')
self.key = key
self.headers = {"Content-Type": "application/json"}
super().__init__(wait=wait, retry=retry, system_prompt=system_prompt, verbose=verbose, **kwargs)
def image_to_base64(self, image_path):
with open(image_path, 'rb') as image_file:
encoded_string = str(base64.b64encode(image_file.read()), 'utf-8')
return encoded_string
def prepare_inputs(self, inputs):
msgs = cp.deepcopy(inputs)
content = []
for i, msg in enumerate(msgs):
if msg['type'] == 'text':
pass
else:
try:
image_data = self.image_to_base64(msg['value'])
except Exception as e:
if self.verbose:
self.logger.error(e)
image_data = ''
msg['value'] = image_data
content.append(msg)
return content
def generate_inner(self, inputs, **kwargs) -> str:
assert isinstance(inputs, str) or isinstance(inputs, list)
start = time.time()
inputs = [inputs] if isinstance(inputs, str) else inputs
messages = self.prepare_inputs(inputs)
service_url = "https://bailingchat.alipay.com/api/proxy/eval/antgmm/completions"
payload = {
"structInput": messages,
"sk": self.key,
"timeout": 180000
}
response = requests.post(service_url, headers=self.headers, json=payload)
if self.verbose:
self.logger.info('Time for requesting is:')
self.logger.info(time.time() - start)
try:
assert response.status_code == 200
output = json.loads(response.text)
answer = output['preds']['pred']
if self.verbose:
self.logger.info(f'inputs: {inputs}\nanswer: {answer}')
return 0, answer, 'Succeeded! '
except Exception as e:
if self.verbose:
self.logger.error(e)
self.logger.error(f'The input messages are {inputs}.')
return -1, self.fail_msg, ''
class bailingMMAPI(bailingMMWrapper):
def generate(self, message, dataset=None):
return super(bailingMMAPI, self).generate(message, dataset=dataset)
This diff is collapsed.
from vlmeval.smp import *
from vlmeval.api.base import BaseAPI
import os
import json
def multimodal(images, text, url, key, temperature=0, max_tokens=1024, history=[]):
if images:
pics = []
for image in images:
with open(image, 'rb') as f:
pic = base64.b64encode(f.read()).decode('utf-8')
pics.append(pic)
data = {'images': pics, 'text': text, 'key': key, 'temperature': temperature, 'max_new_tokens': max_tokens}
else:
data = {'text': text, 'key': key, 'temperature': temperature, 'max_new_tokens': max_tokens}
response = requests.post(url, json=data, headers={'Content-Type': 'application/json'})
response = json.loads(response.text)
return response
class BlueLMWrapper(BaseAPI):
is_api: bool = True
def __init__(self,
model: str = 'BlueLM-V-v3.0',
retry: int = 5,
wait: int = 5,
verbose: bool = True,
temperature: float = 0.0,
system_prompt: str = None,
max_tokens: int = 1024,
key: str = None,
url: str = 'http://api-ai.vivo.com.cn/multimodal',
**kwargs):
self.model = model
self.fail_msg = 'Failed to obtain answer BlueLM-V API. '
self.max_tokens = max_tokens
self.temperature = temperature
self.url = url
self.key = key
if self.key is None:
self.key = os.environ.get('BLUELM_V_API_KEY', None)
assert self.key is not None, (
'Please set the API Key (obtain it here: '
'contact by email : shuai.ren@vivo.com'
)
super().__init__(wait=wait, retry=retry, system_prompt=system_prompt, verbose=verbose, **kwargs)
def message_to_promptimg(self, message, dataset=None):
num_images = len([x for x in message if x['type'] == 'image'])
if num_images == 0:
prompt = '\n'.join([x['value'] for x in message if x['type'] == 'text'])
image = None
elif num_images == 1:
prompt = '\n'.join([x['value'] for x in message if x['type'] == 'text'])
image = [x['value'] for x in message if x['type'] == 'image']
else:
prompt = '\n'.join([x['value'] if x['type'] == 'text' else '<image>' for x in message])
if dataset == 'BLINK':
image = concat_images_vlmeval(
[x['value'] for x in message if x['type'] == 'image'],
target_size=512)
else:
image = [x['value'] for x in message if x['type'] == 'image']
if dataset in ['MMBench_DEV_EN_V11', 'MMBench_DEV_CN_V11', 'MMBench_TEST_EN_V11', 'MMBench_TEST_CN_V11',
'AI2D_TEST', 'AI2D_TEST_TO_MASK', 'MMMU_DEV_VAL']:
prompt = prompt.replace('Please select the correct answer from the options above.',
'Answer with the option’s letter from the given choices directly.')
elif dataset in ['ChartQA_TEST']:
prompt = prompt.replace('Answer the question using a single word or phrase.',
'Answer the question using a single number or phrase.')
elif dataset in ['DocVQA_VAL', 'DocVQA_TEST', ]:
prompt = prompt.replace('Answer the question using a single word or phrase.',
'Give the short answer directly.')
elif dataset in ['TextVQA_VAL']:
prompt = prompt.replace('Answer the question using a single word or phrase.',
'When the provided information is insufficient, respond with ’Unanswerable’.'
'Answer the question using a single word or phrase.')
elif dataset in ['MTVQA_TEST']:
prompt = prompt.replace('\nAnswer the question using a word or phrase in the language of the question.', '')
elif dataset in ['MathVista_MINI']:
if 'Choices:' in prompt:
prompt = prompt.replace('Choices:', 'Options:').replace('Hint:', 'Context:')
for i in range(1, 7): # replace A ~ F
prompt = prompt.replace(f'({chr(64 + i)})', f'{chr(64 + i)}.')
prompt += '\nAnswer with the option’s letter from the given choices directly.'
else:
prompt += '\nAnswer the question using a single word or phrase.'
return prompt, image
def generate_inner(self, inputs, **kwargs) -> str:
assert isinstance(inputs, str) or isinstance(inputs, list)
pure_text = np.all([x['type'] == 'text' for x in inputs])
assert not pure_text
prompt, image_path = self.message_to_promptimg(inputs, kwargs['dataset'])
try:
response = multimodal(image_path, prompt, self.url, self.key, self.temperature, self.max_tokens)
answer = response['result']
return 0, answer, 'Succeeded! '
except Exception as err:
if self.verbose:
self.logger.error(f'{type(err)}: {err}')
self.logger.error(f'The input messages are {inputs}.')
return -1, '', ''
class BlueLM_V_API(BlueLMWrapper):
def generate(self, message, dataset=None):
return super(BlueLM_V_API, self).generate(message, dataset=dataset)
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment