Commit 4439b087 authored by chenzk's avatar chenzk
Browse files

v1.0.3

parent c487e25a
......@@ -30,12 +30,11 @@ mv vita_pytorch VITA # 去框架名后缀
```
docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.3.0-ubuntu22.04-dtk24.04.2-py3.10
# <your IMAGE ID>为以上拉取的docker的镜像ID替换,本镜像为:83714c19d308
docker run -it --shm-size=64G -v $PWD/VITA:/home/VITA-v /opt/hyhal:/opt/hyhal:ro --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video --name vita <your IMAGE ID> bash
docker run -it --shm-size=64G -v $PWD/VITA:/home/VITA -v /opt/hyhal:/opt/hyhal:ro --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video --name vita <your IMAGE ID> bash
cd /home/VITA
pip install -r requirements.txt # requirements.txt
# 安装ffmpeg
apt update
apt-get install ffmpeg
# 安装torchaudio读取音频所需的ffmpeg-4.4.4
sh ffmpeg_env.sh
# 安装gradio
pip install gradio==5.4.0 # gradio
cp -r frpc_linux_amd64 /usr/local/lib/python3.10/site-packages/gradio/frpc_linux_amd64_v0.3
......@@ -48,9 +47,8 @@ docker build --no-cache -t vita:latest .
docker run --shm-size=64G --name vita -v /opt/hyhal:/opt/hyhal:ro --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video -v $PWD/../../VITA:/home/VITA -it vita bash
# 若遇到Dockerfile启动的方式安装环境需要长时间等待,可注释掉里面的pip安装,启动容器后再安装python库:pip install -r requirements.txt。
cd /home/VITA
# 安装ffmpeg
apt update
apt-get install ffmpeg
# 安装torchaudio读取音频所需的ffmpeg-4.4.4
sh ffmpeg_env.sh
# 安装gradio
pip install gradio==5.4.0 # gradio
cp -r frpc_linux_amd64 /usr/local/lib/python3.10/site-packages/gradio/frpc_linux_amd64_v0.3
......@@ -78,9 +76,8 @@ xformers:0.0.25
```
cd VITA
pip install -r requirements.txt # requirements.txt
# 安装ffmpeg
apt update
apt-get install ffmpeg
# 安装torchaudio读取音频所需的ffmpeg-4.4.4
sh ffmpeg_env.sh
# 安装gradio
pip install gradio==5.4.0 # gradio
cp -r frpc_linux_amd64 /usr/local/lib/python3.10/site-packages/gradio/frpc_linux_amd64_v0.3
......@@ -169,28 +166,7 @@ mv custom/* ShareGPT4V/
## 推理
```
# 方法一:pytorch
# Text query
HIP_VISIBLE_DEVICES=0,1 python video_audio_demo.py --model_path VITA/VITA_ckpt --image_path asset/vita_log2.png --model_type mixtral-8x7b --conv_mode mixtral_two --question "请描述这张图片。"
# infer.sh 中的audio推理功能敬请期待后续开放,torchaudio库适配.wav等音频文件的功能正在DCU上适配中,待适配后可从光合开发者社区下载。
# 方法二:vllm(优化中)
# 先准备vllm推理环境
apt update
apt-get install portaudio19-dev
apt-get install libasound-dev portaudio19-dev libportaudio2 libportaudiocpp0
pip install -r web_demo/web_demo_requirements.txt
pip install whl/vllm-0.6.2+das.opt1.85def94.dtk24042-cp310-cp310-linux_x86_64.whl
pip install whl/flash_attn-2.6.1+das.opt2.08f8827.dtk24042-cp310-cp310-linux_x86_64.whl
cp -r VITA/VITA_ckpt/ demo_VITA_ckpt/
cd ./web_demo/vllm_tools
cp -rf model_weight_file/* ../../demo_VITA_ckpt/
cp -rf vllm_file/* cp -rf vllm_file/* /usr/local/lib/python3.10/site-packages/vllm/model_executor/models/ #通过pip show vllm可以查看vllm安装位置
cp -rf multimodal/* /usr/local/lib/python3.10/site-packages/vllm/multimodal/
# 推理
python -m web_demo.web_ability_demo demo_VITA_ckpt/
sh infer.sh
```
更多资料可参考源项目的[`README_origin`](./README_origin.md)
......
apt-get update
apt-get upgrade -y
apt-get install -y libmad0 libmad0-dev libid3tag0 libid3tag0-dev libmp3lame-dev libflac-dev libvorbis-dev yasm nasm
wget --no-check-certificate https://www.ffmpeg.org/releases/ffmpeg-4.4.4.tar.gz
tar xzf ffmpeg-4.4.4.tar.gz
cd ffmpeg-4.4.4
./configure --enable-shared --enable-libmp3lame
make -j32
make install
cd -
export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
rm -rf ffmpeg-4.4.4 ffmpeg-4.4.4.tar.gz
# Text query
HIP_VISIBLE_DEVICES=0,1 python video_audio_demo.py --model_path VITA/VITA_ckpt --image_path asset/vita_log2.png --model_type mixtral-8x7b --conv_mode mixtral_two --question "请描述这张图片。"
# Audio query
# HIP_VISIBLE_DEVICES=0,1 python video_audio_demo.py --model_path VITA/VITA_ckpt --image_path asset/vita_log2.png --model_type mixtral-8x7b --conv_mode mixtral_two --audio_path asset/q1.wav
HIP_VISIBLE_DEVICES=0,1 python video_audio_demo.py --model_path VITA/VITA_ckpt --image_path asset/vita_log2.png --model_type mixtral-8x7b --conv_mode mixtral_two --audio_path asset/q1.wav
# Noisy audio query
# HIP_VISIBLE_DEVICES=0,1 python video_audio_demo.py --model_path VITA/VITA_ckpt --image_path asset/vita_log2.png --model_type mixtral-8x7b --conv_mode mixtral_two --audio_path asset/q2.wav
HIP_VISIBLE_DEVICES=0,1 python video_audio_demo.py --model_path VITA/VITA_ckpt --image_path asset/vita_log2.png --model_type mixtral-8x7b --conv_mode mixtral_two --audio_path asset/q2.wav
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment