Commit 22c8347c authored by chenpangpang's avatar chenpangpang
Browse files

feat: 初始提交

parent 07ba1fd9
weights/
*.pt
*.bin
*.safetensors
.*
!.gitignore
__pycache__
transformers.zip
\ No newline at end of file
FROM image.sourcefind.cn:5000/gpu/admin/base/jupyterlab-pytorch:2.2.0-python3.10-cuda12.1-ubuntu22.04 as base
ARG IMAGE=qwen2-vl-7b-instruct
ARG IMAGE_UPPER=Qwen2-7B-VL-demo
ARG BRANCH=gpu
RUN cd /root && git clone -b $BRANCH http://developer.hpccube.com/codes/chenpangpang/$IMAGE.git
WORKDIR /root/$IMAGE/$IMAGE_UPPER
RUN pip install --no-cache-dir -r requirements.txt
#########
# Prod #
#########
FROM image.sourcefind.cn:5000/gpu/admin/base/jupyterlab-pytorch:2.2.0-python3.10-cuda12.1-ubuntu22.04
ARG IMAGE=qwen2-vl-7b-instruct
ARG IMAGE_UPPER=Qwen2-7B-VL-demo
COPY chenyh/$IMAGE/frpc_linux_amd64_v0.2 /opt/conda/lib/python3.10/site-packages/gradio/
RUN chmod +x /opt/conda/lib/python3.10/site-packages/gradio/frpc_linux_amd64_v0.2
COPY chenyh/$IMAGE/qwen/Qwen2-VL-7B-Instruct /root/$IMAGE_UPPER/qwen/Qwen2-VL-7B-Instruct
COPY --from=base /opt/conda/lib/python3.10/site-packages /opt/conda/lib/python3.10/site-packages
COPY --from=base /root/$IMAGE/$IMAGE_UPPER /root/$IMAGE_UPPER
COPY --from=base /root/$IMAGE/启动器.ipynb /root/$IMAGE/start.sh /root/
COPY --from=base /root/$IMAGE/assets /root/assets
---
# 详细文档见https://modelscope.cn/docs/%E5%88%9B%E7%A9%BA%E9%97%B4%E5%8D%A1%E7%89%87
domain: #领域:cv/nlp/audio/multi-modal/AutoML
# - cv
tags: #自定义标签
-
datasets: #关联数据集
evaluation:
#- iic/ICDAR13_HCTR_Dataset
test:
#- iic/MTWI
train:
#- iic/SIBR
models: #关联模型
#- iic/ofa_ocr-recognition_general_base_zh
## 启动文件(若SDK为Gradio/Streamlit,默认为app.py, 若为Static HTML, 默认为index.html)
# deployspec:
# entry_file: app.py
license: Apache License 2.0
---
#### Clone with HTTP
```bash
git clone https://www.modelscope.cn/studios/qwen/Qwen2-7B-VL-demo.git
```
\ No newline at end of file
# Copyright (c) Alibaba Cloud.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
import copy
import re
import os
from argparse import ArgumentParser
from threading import Thread
import gradio as gr
from qwen_vl_utils import process_vision_info
from transformers import AutoProcessor, Qwen2VLForConditionalGeneration, TextIteratorStreamer
from modelscope import snapshot_download
# from modelscope.hub.api import HubApi
# api = HubApi()
# api.login(os.getenv('MS_SDK_TOKEN'))
# DEFAULT_CKPT_PATH = snapshot_download('qwen/Qwen2-VL-7B-Instruct')
DEFAULT_CKPT_PATH = 'qwen/Qwen2-VL-7B-Instruct'
def _get_args():
parser = ArgumentParser()
parser.add_argument('-c',
'--checkpoint-path',
type=str,
default=DEFAULT_CKPT_PATH,
help='Checkpoint name or path, default to %(default)r')
parser.add_argument('--cpu-only', action='store_true', help='Run demo with CPU only')
parser.add_argument('--share',
default=True,
help='Create a publicly shareable link for the interface.')
parser.add_argument('--inbrowser',
action='store_true',
default=False,
help='Automatically launch the interface in a new tab on the default browser.')
parser.add_argument('--server-port', type=int, default=7860, help='Demo server port.')
parser.add_argument('--server-name', type=str, default='0.0.0.0', help='Demo server name.')
args = parser.parse_args()
return args
def _load_model_processor(args):
if args.cpu_only:
device_map = 'cpu'
else:
device_map = 'auto'
# default: Load the model on the available device(s)
# model = Qwen2VLForConditionalGeneration.from_pretrained(args.checkpoint_path, device_map=device_map)
# We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
model = Qwen2VLForConditionalGeneration.from_pretrained(args.checkpoint_path,
torch_dtype='auto',
# attn_implementation='flash_attention_2',
device_map=device_map)
min_pixels = 256*28*28
max_pixels = 1280*28*28
processor = AutoProcessor.from_pretrained(args.checkpoint_path,min_pixels=min_pixels, max_pixels=max_pixels)
return model, processor
def _parse_text(text):
lines = text.split('\n')
lines = [line for line in lines if line != '']
count = 0
for i, line in enumerate(lines):
if '```' in line:
count += 1
items = line.split('`')
if count % 2 == 1:
lines[i] = f'<pre><code class="language-{items[-1]}">'
else:
lines[i] = '<br></code></pre>'
else:
if i > 0:
if count % 2 == 1:
line = line.replace('`', r'\`')
line = line.replace('<', '&lt;')
line = line.replace('>', '&gt;')
line = line.replace(' ', '&nbsp;')
line = line.replace('*', '&ast;')
line = line.replace('_', '&lowbar;')
line = line.replace('-', '&#45;')
line = line.replace('.', '&#46;')
line = line.replace('!', '&#33;')
line = line.replace('(', '&#40;')
line = line.replace(')', '&#41;')
line = line.replace('$', '&#36;')
lines[i] = '<br>' + line
text = ''.join(lines)
return text
def _remove_image_special(text):
text = text.replace('<ref>', '').replace('</ref>', '')
return re.sub(r'<box>.*?(</box>|$)', '', text)
def is_video_file(filename):
video_extensions = ['.mp4', '.avi', '.mkv', '.mov', '.wmv', '.flv', '.webm', '.mpeg']
return any(filename.lower().endswith(ext) for ext in video_extensions)
def transform_messages(original_messages):
transformed_messages = []
for message in original_messages:
new_content = []
for item in message['content']:
if 'image' in item:
new_item = {'type': 'image', 'image': item['image']}
elif 'text' in item:
new_item = {'type': 'text', 'text': item['text']}
elif 'video' in item:
new_item = {'type': 'video', 'video': item['video']}
else:
continue
new_content.append(new_item)
new_message = {'role': message['role'], 'content': new_content}
transformed_messages.append(new_message)
return transformed_messages
def _launch_demo(args, model, processor):
def call_local_model(model, processor, messages):
messages = transform_messages(messages)
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
image_inputs, video_inputs = process_vision_info(messages)
inputs = processor(text=[text], images=image_inputs, videos=video_inputs, padding=True, return_tensors='pt').to("cuda")
tokenizer = processor.tokenizer
streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
gen_kwargs = {'max_new_tokens': 512, 'streamer': streamer, **inputs}
thread = Thread(target=model.generate, kwargs=gen_kwargs)
thread.start()
generated_text = ''
for new_text in streamer:
generated_text += new_text
yield generated_text
def create_predict_fn():
def predict(_chatbot, task_history):
nonlocal model, processor
chat_query = _chatbot[-1][0]
query = task_history[-1][0]
if len(chat_query) == 0:
_chatbot.pop()
task_history.pop()
return _chatbot
print('User: ' + _parse_text(query))
history_cp = copy.deepcopy(task_history)
full_response = ''
messages = []
content = []
for q, a in history_cp:
if isinstance(q, (tuple, list)):
if is_video_file(q[0]):
content.append({'video': f'file://{q[0]}'})
else:
content.append({'image': f'file://{q[0]}'})
else:
content.append({'text': q})
messages.append({'role': 'user', 'content': content})
messages.append({'role': 'assistant', 'content': [{'text': a}]})
content = []
messages.pop()
for response in call_local_model(model, processor, messages):
_chatbot[-1] = (_parse_text(chat_query), _remove_image_special(_parse_text(response)))
yield _chatbot
full_response = _parse_text(response)
task_history[-1] = (query, full_response)
print('Qwen-VL-Chat: ' + _parse_text(full_response))
yield _chatbot
return predict
def create_regenerate_fn():
def regenerate(_chatbot, task_history):
nonlocal model, processor
if not task_history:
return _chatbot
item = task_history[-1]
if item[1] is None:
return _chatbot
task_history[-1] = (item[0], None)
chatbot_item = _chatbot.pop(-1)
if chatbot_item[0] is None:
_chatbot[-1] = (_chatbot[-1][0], None)
else:
_chatbot.append((chatbot_item[0], None))
_chatbot_gen = predict(_chatbot, task_history)
for _chatbot in _chatbot_gen:
yield _chatbot
return regenerate
predict = create_predict_fn()
regenerate = create_regenerate_fn()
def add_text(history, task_history, text):
task_text = text
history = history if history is not None else []
task_history = task_history if task_history is not None else []
history = history + [(_parse_text(text), None)]
task_history = task_history + [(task_text, None)]
return history, task_history, ''
def add_file(history, task_history, file):
history = history if history is not None else []
task_history = task_history if task_history is not None else []
history = history + [((file.name,), None)]
task_history = task_history + [((file.name,), None)]
return history, task_history
def reset_user_input():
return gr.update(value='')
def reset_state(task_history):
task_history.clear()
return []
with gr.Blocks() as demo:
gr.Markdown("""\
<p align="center"><img src="https://qianwen-res.oss-accelerate-overseas.aliyuncs.com/Qwen2-VL/qwen2VL_logo.png" style="height: 80px"/><p>"""
)
gr.Markdown("""<center><font size=8>Qwen2-VL</center>""")
gr.Markdown("""\
<center><font size=3>This WebUI is based on Qwen2-VL, developed by Alibaba Cloud.</center>""")
gr.Markdown("""<center><font size=3>本WebUI基于Qwen2-VL。</center>""")
chatbot = gr.Chatbot(label='Qwen2-VL', elem_classes='control-height', height=500)
query = gr.Textbox(lines=2, label='Input')
task_history = gr.State([])
with gr.Row():
addfile_btn = gr.UploadButton('📁 Upload (上传文件)', file_types=['image', 'video'])
submit_btn = gr.Button('🚀 Submit (发送)')
regen_btn = gr.Button('🤔️ Regenerate (重试)')
empty_bin = gr.Button('🧹 Clear History (清除历史)')
submit_btn.click(add_text, [chatbot, task_history, query],
[chatbot, task_history]).then(predict, [chatbot, task_history], [chatbot], show_progress=True)
submit_btn.click(reset_user_input, [], [query])
empty_bin.click(reset_state, [task_history], [chatbot], show_progress=True)
regen_btn.click(regenerate, [chatbot, task_history], [chatbot], show_progress=True)
addfile_btn.upload(add_file, [chatbot, task_history, addfile_btn], [chatbot, task_history], show_progress=True)
gr.Markdown("""\
<font size=2>Note: This demo is governed by the original license of Qwen2-VL. \
We strongly advise users not to knowingly generate or allow others to knowingly generate harmful content, \
including hate speech, violence, pornography, deception, etc. \
(注:本演示受Qwen2-VL的许可协议限制。我们强烈建议,用户不应传播及不应允许他人传播以下内容,\
包括但不限于仇恨言论、暴力、色情、欺诈相关的有害信息。)""")
demo.queue().launch(
share=args.share,
inbrowser=args.inbrowser,
server_port=args.server_port,
server_name=args.server_name,
)
def main():
args = _get_args()
model, processor = _load_model_processor(args)
_launch_demo(args, model, processor)
if __name__ == '__main__':
main()
# ./transformers.zip
qwen-vl-utils
\ No newline at end of file
#!/bin/bash
pip install modelscope
modelscope download --model qwen/Qwen2-VL-7B-Instruct --local_dir qwen/Qwen2-VL-7B-Instruct
\ No newline at end of file
#!/bin/bash
cd /root/Qwen2-7B-VL-demo
python app.py
{
"cells": [
{
"cell_type": "markdown",
"id": "e5c5a211-2ccd-4341-af10-ac546484b91f",
"metadata": {
"tags": []
},
"source": [
"## 项目介绍\n",
"- 原项目地址:https://www.modelscope.cn/studios/qwen/Qwen2-7B-VL-demo\n",
"- Qwen2-7B-VL:通义千问推出的聊天机器人模型,具有增强的图像理解能力、高级视频理解能力、集成的可视化agent功能、扩展的多语言支持等特点。\n",
"## 使用说明\n",
"- 启动和重启 Notebook 点上方工具栏中的「重启并运行所有单元格」。出现如下内容就算成功了:\n",
" - `Running on local URL: http://0.0.0.0:7860`\n",
" - `Running on public URL: https://xxxxxxxxxxxxxxx.gradio.live`\n",
"- 通过以下方式开启页面:\n",
" - 控制台打开「自定义服务」了,访问自定义服务端口号设置为7860\n",
" - 直接打开显示的公开链接`public URL`\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "53a96614-e2d2-4710-a82b-0d5ca9cb9872",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# 启动\n",
"!sh start.sh"
]
},
{
"cell_type": "markdown",
"source": [
"---\n",
"**扫码关注公众号,获取更多资讯**<br>\n",
"<div align=center>\n",
"<img src=\"assets/二维码.jpeg\" width = 20% />\n",
"</div>\n"
],
"metadata": {
"collapsed": false
},
"id": "2f54158c2967bc25"
},
{
"cell_type": "code",
"outputs": [],
"source": [],
"metadata": {
"collapsed": false
},
"id": "6dc59fbbcf222b6b"
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment