Initial commit

f19343b0 · wanglch · f19343b0 · f19343b0 · f19343b0 · f19343b0
Commit f19343b0 authored Feb 20, 2025 by wanglch
20 changed files
--- a/cookbooks/video_understanding.ipynb
+++ b/cookbooks/video_understanding.ipynb
--- a/docker/Dockerfile-2.5-cu121
+++ b/docker/Dockerfile-2.5-cu121
+# Dockerfile of qwenllm/qwenvl:2.5-cu121
+
+ARG CUDA_VERSION=12.1.0
+ARG from=nvidia/cuda:${CUDA_VERSION}-cudnn8-devel-ubuntu22.04
+
+FROM ${from} as base
+
+ARG DEBIAN_FRONTEND=noninteractive
+RUN <<EOF
+apt update -y && apt upgrade -y && apt install -y --no-install-recommends  \
+    git \
+    git-lfs \
+    python3 \
+    python3-pip \
+    python3-dev \
+    wget \
+    vim \
+    libsndfile1 \
+    ccache \
+    software-properties-common \
+&& rm -rf /var/lib/apt/lists/*
+EOF
+
+RUN wget https://github.com/Kitware/CMake/releases/download/v3.26.1/cmake-3.26.1-Linux-x86_64.sh \
+    -q -O /tmp/cmake-install.sh \
+    && chmod u+x /tmp/cmake-install.sh \
+    && mkdir /opt/cmake-3.26.1 \
+    && /tmp/cmake-install.sh --skip-license --prefix=/opt/cmake-3.26.1 \
+    && rm /tmp/cmake-install.sh \
+    && ln -s /opt/cmake-3.26.1/bin/* /usr/local/bin
+
+RUN ln -s /usr/bin/python3 /usr/bin/python
+
+RUN git lfs install
+
+FROM base as dev
+
+WORKDIR /
+
+RUN mkdir -p /data/shared/Qwen
+
+WORKDIR /data/shared/Qwen/
+
+FROM dev as bundle_req
+RUN --mount=type=cache,target=/root/.cache/pip pip3 install networkx==3.1
+RUN --mount=type=cache,target=/root/.cache/pip pip3 install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 xformers==0.0.28.post3 --index-url https://download.pytorch.org/whl/cu121
+RUN --mount=type=cache,target=/root/.cache/pip pip3 install git+https://github.com/huggingface/transformers@f3f6c86582611976e72be054675e2bf0abb5f775  \
+    && pip3 install accelerate
+
+COPY ../qwen-vl-utils ./qwen-vl-utils
+RUN cd ./qwen-vl-utils \
+    && pip3 install .
+
+FROM bundle_req as bundle_vllm
+
+ARG BUNDLE_FLASH_ATTENTION=true
+
+ENV MAX_JOBS=8
+ENV NVCC_THREADS=1
+ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
+ENV VLLM_FA_CMAKE_GPU_ARCHES="80-real;90-real"
+ENV CCACHE_DIR=/root/.cache/ccache
+
+RUN --mount=type=cache,target=/root/.cache/ccache \
+    --mount=type=cache,target=/root/.cache/pip \
+    if [ "$BUNDLE_FLASH_ATTENTION" = "true" ]; then \
+        pip3 install --no-build-isolation flash-attn==2.7.2.post1; \
+    fi
+
+ARG BUNDLE_VLLM=true
+
+RUN --mount=type=cache,target=/root/.cache/ccache \
+    --mount=type=cache,target=/root/.cache/pip \
+    if [ "$BUNDLE_VLLM" = "true" ]; then \
+    mkdir -p /data/shared/code \
+        && cd /data/shared/code \
+        && git clone https://github.com/vllm-project/vllm.git \
+        && cd vllm \
+        && git checkout bf3b79efb82676219a3275764d8fcf4c70097ce5 \
+        && pip3 install -r requirements-cuda.txt \
+        && pip3 install setuptools-scm \
+        && pip3 install . \
+        && cd /data/shared/Qwen \
+        && rm -rf /data/shared/code/vllm; \
+    fi
+
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip3 install \
+    gradio==5.4.0 \
+    gradio_client==1.4.2 \
+    transformers-stream-generator==0.0.4 \
+    av
+
+RUN rm -rvf /root/.cache/pip
+
+COPY ../web_demo_mm.py ./
+COPY ../web_demo_streaming ./web_demo_streaming
+
+EXPOSE 80
--- a/docker/Dockerfile-cu121
+++ b/docker/Dockerfile-cu121
+# Dockerfile of qwenllm/qwenvl:2-cu121
+
+ARG CUDA_VERSION=12.1.0
+ARG from=nvidia/cuda:${CUDA_VERSION}-cudnn8-devel-ubuntu20.04
+
+FROM ${from} as base
+
+ARG DEBIAN_FRONTEND=noninteractive
+RUN <<EOF
+apt update -y && apt upgrade -y && apt install -y --no-install-recommends  \
+    git \
+    git-lfs \
+    python3 \
+    python3-pip \
+    python3-dev \
+    wget \
+    vim \
+    libsndfile1 \
+&& rm -rf /var/lib/apt/lists/*
+EOF
+
+RUN wget https://github.com/Kitware/CMake/releases/download/v3.26.1/cmake-3.26.1-Linux-x86_64.sh \
+    -q -O /tmp/cmake-install.sh \
+    && chmod u+x /tmp/cmake-install.sh \
+    && mkdir /opt/cmake-3.26.1 \
+    && /tmp/cmake-install.sh --skip-license --prefix=/opt/cmake-3.26.1 \
+    && rm /tmp/cmake-install.sh \
+    && ln -s /opt/cmake-3.26.1/bin/* /usr/local/bin
+
+RUN ln -s /usr/bin/python3 /usr/bin/python
+
+RUN git lfs install
+
+FROM base as dev
+
+WORKDIR /
+
+RUN mkdir -p /data/shared/Qwen
+
+WORKDIR /data/shared/Qwen/
+
+FROM dev as bundle_req
+RUN pip3 install --no-cache-dir networkx==3.1
+RUN pip3 install --no-cache-dir torch==2.4.0 torchvision==0.19 torchaudio==2.4.0 xformers==0.0.27.post2 --index-url https://download.pytorch.org/whl/cu121
+
+RUN pip3 install --no-cache-dir git+https://github.com/huggingface/transformers@21fac7abba2a37fae86106f87fcf9974fd1e3830  \
+    && pip3 install --no-cache-dir accelerate \
+    && pip3 install --no-cache-dir qwen-vl-utils
+
+FROM bundle_req as bundle_vllm
+
+RUN pip3 install --no-cache-dir --no-build-isolation flash-attn==2.6.1
+
+RUN mkdir -p /data/shared/code \
+    && cd /data/shared/code \
+    && git clone https://github.com/fyabc/vllm.git \
+    && cd vllm \
+    && git checkout add_qwen2_vl_new \
+    && pip3 install --no-cache-dir -r requirements-cuda.txt \
+    && pip3 install --no-cache-dir --no-build-isolation . \
+    && cd /data/shared/Qwen \
+    && rm -rf /data/shared/code/vllm
+
+RUN pip3 install --no-cache-dir \
+    gradio==4.42.0 \
+    gradio_client==1.3.0 \
+    transformers-stream-generator==0.0.4
+
+COPY ../utils.py ./
+COPY ../web_demo_mm.py ./
+
+EXPOSE 80
--- a/docker/docker_web_demo.sh
+++ b/docker/docker_web_demo.sh
+#!/usr/bin/env bash
+#
+# This script will automatically pull docker image from DockerHub, and start a daemon container to run the Qwen-Chat web-demo.
+
+IMAGE_NAME=qwenllm/qwenvl:2.5-cu121
+QWEN_CHECKPOINT_PATH=/path/to/Qwen2.5-VL-7B-Instruct
+PORT=8901
+CONTAINER_NAME=qwen2.5-vl
+
+function usage() {
+    echo '
+Usage: bash docker/docker_web_demo.sh [-i IMAGE_NAME] -c [/path/to/Qwen-Instruct] [-n CONTAINER_NAME] [--port PORT]
+'
+}
+
+while [[ "$1" != "" ]]; do
+    case $1 in
+        -i | --image-name )
+            shift
+            IMAGE_NAME=$1
+            ;;
+        -c | --checkpoint )
+            shift
+            QWEN_CHECKPOINT_PATH=$1
+            ;;
+        -n | --container-name )
+            shift
+            CONTAINER_NAME=$1
+            ;;
+        --port )
+            shift
+            PORT=$1
+            ;;
+        -h | --help )
+            usage
+            exit 0
+            ;;
+        * )
+            echo "Unknown argument ${1}"
+            exit 1
+            ;;
+    esac
+    shift
+done
+
+if [ ! -e ${QWEN_CHECKPOINT_PATH}/config.json ]; then
+    echo "Checkpoint config.json file not found in ${QWEN_CHECKPOINT_PATH}, exit."
+    exit 1
+fi
+
+sudo docker pull ${IMAGE_NAME} || {
+    echo "Pulling image ${IMAGE_NAME} failed, exit."
+    exit 1
+}
+
+sudo docker run --gpus all -d --restart always --name ${CONTAINER_NAME} \
+    -v /var/run/docker.sock:/var/run/docker.sock -p ${PORT}:80 \
+    --mount type=bind,source=${QWEN_CHECKPOINT_PATH},target=/data/shared/Qwen/Qwen2.5-VL-Instruct \
+    -it ${IMAGE_NAME} \
+    python web_demo_mm.py --server-port 80 --server-name 0.0.0.0 -c /data/shared/Qwen/Qwen2.5-VL-Instruct/ && {
+    echo "Successfully started web demo. Open 'http://localhost:${PORT}' to try!
+Run \`docker logs ${CONTAINER_NAME}\` to check demo status.
+Run \`docker rm -f ${CONTAINER_NAME}\` to stop and remove the demo."
+}
\ No newline at end of file
--- a/flash_attn-2.6.1+das.opt2.dtk24043-cp310-cp310-manylinux_2_28_x86_64.whl
+++ b/flash_attn-2.6.1+das.opt2.dtk24043-cp310-cp310-manylinux_2_28_x86_64.whl
--- a/icon.png
+++ b/icon.png
--- a/images/arch.png
+++ b/images/arch.png
--- a/images/result.png
+++ b/images/result.png
--- a/images/theory.png
+++ b/images/theory.png
--- a/inference.py
+++ b/inference.py
+from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
+from qwen_vl_utils import process_vision_info
+
+# default: Load the model on the available device(s)
+model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+    "/home/wanglch/Qwen2.5-VL/Qwen2.5-VL-7B-Instruct/", torch_dtype="auto", device_map="auto"
+)
+
+# We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
+# model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+#     "Qwen/Qwen2.5-VL-7B-Instruct",
+#     torch_dtype=torch.bfloat16,
+#     attn_implementation="flash_attention_2",
+#     device_map="auto",
+# )
+
+# default processor
+processor = AutoProcessor.from_pretrained("/home/wanglch/Qwen2.5-VL/Qwen2.5-VL-7B-Instruct/")
+
+# The default range for the number of visual tokens per image in the model is 4-16384.
+# You can set min_pixels and max_pixels according to your needs, such as a token range of 256-1280, to balance performance and cost.
+# min_pixels = 256*28*28
+# max_pixels = 1280*28*28
+# processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct", min_pixels=min_pixels, max_pixels=max_pixels)
+
+messages = [
+    {
+        "role": "user",
+        "content": [
+            {
+                "type": "image",
+                "image": "/home/wanglch/Images/1.jpg",
+            },
+            {"type": "text", "text":"ocr this image."},
+        ],
+    }
+]
+
+# Preparation for inference
+text = processor.apply_chat_template(
+    messages, tokenize=False, add_generation_prompt=True
+)
+image_inputs, video_inputs = process_vision_info(messages)
+inputs = processor(
+    text=[text],
+    images=image_inputs,
+    videos=video_inputs,
+    padding=True,
+    return_tensors="pt",
+)
+inputs = inputs.to(model.device)
+
+# Inference: Generation of the output
+generated_ids = model.generate(**inputs, max_new_tokens=128)
+generated_ids_trimmed = [
+    out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+]
+output_text = processor.batch_decode(
+    generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+)
+print(output_text)
--- a/mllm_demo.json
+++ b/mllm_demo.json
+[
+  {
+    "messages": [
+      {
+        "content": "<image>Who are they?",
+        "role": "user"
+      },
+      {
+        "content": "They're Kane and Gretzka from Bayern Munich.",
+        "role": "assistant"
+      },
+      {
+        "content": "What are they doing?",
+        "role": "user"
+      },
+      {
+        "content": "They are celebrating on the soccer field.",
+        "role": "assistant"
+      }
+    ],
+    "images": [
+      "mllm_demo_data/1.jpg"
+    ]
+  },
+  {
+    "messages": [
+      {
+        "content": "<image>Who is he?",
+        "role": "user"
+      },
+      {
+        "content": "He's Thomas Muller from Bayern Munich.",
+        "role": "assistant"
+      },
+      {
+        "content": "Why is he on the ground?",
+        "role": "user"
+      },
+      {
+        "content": "Because he's sliding on his knees to celebrate.",
+        "role": "assistant"
+      }
+    ],
+    "images": [
+      "mllm_demo_data/2.jpg"
+    ]
+  },
+  {
+    "messages": [
+      {
+        "content": "<image>Please describe this image",
+        "role": "user"
+      },
+      {
+        "content": "Chinese astronaut Gui Haichao is giving a speech.",
+        "role": "assistant"
+      },
+      {
+        "content": "What has he accomplished?",
+        "role": "user"
+      },
+      {
+        "content": "He was appointed to be a payload specialist on Shenzhou 16 mission in June 2022, thus becoming the first Chinese civilian of Group 3 in space on 30 May 2023. He is responsible for the on-orbit operation of space science experimental payloads.",
+        "role": "assistant"
+      }
+    ],
+    "images": [
+      "mllm_demo_data/3.jpg"
+    ]
+  },
+  {
+    "messages": [
+      {
+        "content": "<image>他们是谁？",
+        "role": "user"
+      },
+      {
+        "content": "他们是拜仁慕尼黑的凯恩和格雷茨卡。",
+        "role": "assistant"
+      },
+      {
+        "content": "他们在做什么？",
+        "role": "user"
+      },
+      {
+        "content": "他们在足球场上庆祝。",
+        "role": "assistant"
+      }
+    ],
+    "images": [
+      "mllm_demo_data/1.jpg"
+    ]
+  },
+  {
+    "messages": [
+      {
+        "content": "<image>他是谁？",
+        "role": "user"
+      },
+      {
+        "content": "他是来自拜仁慕尼黑的托马斯·穆勒。",
+        "role": "assistant"
+      },
+      {
+        "content": "他为什么在地上？",
+        "role": "user"
+      },
+      {
+        "content": "因为他正在双膝跪地滑行庆祝。",
+        "role": "assistant"
+      }
+    ],
+    "images": [
+      "mllm_demo_data/2.jpg"
+    ]
+  },
+  {
+    "messages": [
+      {
+        "content": "<image>请描述这张图片",
+        "role": "user"
+      },
+      {
+        "content": "中国宇航员桂海潮正在讲话。",
+        "role": "assistant"
+      },
+      {
+        "content": "他取得过哪些成就？",
+        "role": "user"
+      },
+      {
+        "content": "他于2022年6月被任命为神舟十六号任务的有效载荷专家，从而成为2023年5月30日进入太空的首位平民宇航员。他负责在轨操作空间科学实验有效载荷。",
+        "role": "assistant"
+      }
+    ],
+    "images": [
+      "mllm_demo_data/3.jpg"
+    ]
+  }
+]
--- a/mllm_demo_data/1.jpg
+++ b/mllm_demo_data/1.jpg
--- a/mllm_demo_data/1.mp3
+++ b/mllm_demo_data/1.mp3
--- a/mllm_demo_data/1.mp4
+++ b/mllm_demo_data/1.mp4
--- a/mllm_demo_data/2.avi
+++ b/mllm_demo_data/2.avi
--- a/mllm_demo_data/2.jpg
+++ b/mllm_demo_data/2.jpg
--- a/mllm_demo_data/2.wav
+++ b/mllm_demo_data/2.wav
--- a/mllm_demo_data/3.flac
+++ b/mllm_demo_data/3.flac
--- a/mllm_demo_data/3.jpg
+++ b/mllm_demo_data/3.jpg
--- a/mllm_demo_data/3.mp4
+++ b/mllm_demo_data/3.mp4