"sgl-kernel/python/vscode:/vscode.git/clone" did not exist on "3d7f7a43c87f6c8018cda4a0b46217d628d68e63"
Commit d0778a76 authored by wanglch's avatar wanglch
Browse files

Update minicpm-v_version.py

parent dad37c3a
# test.py
import torch
from PIL import Image
from modelscope import AutoModel, AutoTokenizer
# load omni model default, the default init_vision/init_audio/init_tts is True
# if load vision-only model, please set init_audio=False and init_tts=False
# if load audio-only model, please set init_vision=False
model = AutoModel.from_pretrained(
'openbmb/MiniCPM-V-2_6',
trust_remote_code=True,
attn_implementation='sdpa', # sdpa or flash_attention_2
torch_dtype=torch.bfloat16,
init_vision=True,
init_audio=True,
init_tts=True
)
model = AutoModel.from_pretrained('OpenBMB/MiniCPM-V-2_6', trust_remote_code=True,
attn_implementation='sdpa', torch_dtype=torch.bfloat16) # sdpa or flash_attention_2, no eager
model = model.eval().cuda()
tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2_6', trust_remote_code=True)
# In addition to vision-only mode, tts processor and vocos also needs to be initialized
model.init_tts()
tokenizer = AutoTokenizer.from_pretrained('OpenBMB/MiniCPM-V-2_6', trust_remote_code=True)
# test.py
image = Image.open('../images/XXXX.jpg').convert('RGB')
question = 'ocr this image?'
image = Image.open('xx.jpg').convert('RGB')
question = 'What is in the image?'
msgs = [{'role': 'user', 'content': [image, question]}]
res = model.chat(
......@@ -35,3 +19,17 @@ res = model.chat(
)
print(res)
## if you want to use streaming, please make sure sampling=True and stream=True
## the model.chat will return a generator
res = model.chat(
image=None,
msgs=msgs,
tokenizer=tokenizer,
sampling=True,
stream=True
)
generated_text = ""
for new_text in res:
generated_text += new_text
print(new_text, flush=True, end='')
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment