更新推理

89e36472 · laibao · 214595f4 · 89e36472
Commit 89e36472 authored Oct 16, 2024 by laibao
Hide whitespace changes
Inline Side-by-side

Showing with 94 additions and 18 deletions

README.md README.md +94 -18

No files found.
--- a/README.md
+++ b/README.md
@@ -43,7 +43,8 @@ docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.1.0-ubuntu20.04-dt
 # <Image ID>用上面拉取docker镜像的ID替换
 # <Host Path>主机端路径
 # <Container Path>容器映射路径
-docker run -it --name qwen1.5_vllm --privileged --shm-size=64G  --device=/dev/kfd --device=/dev/dri/ --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --ulimit memlock=-1:-1 --ipc=host --network host --group-add video -v /opt/hyhal:/opt/hyhal -v <Host Path>:<Container Path> <Image ID> /bin/bash
+docker run -it --name llava_vllm --privileged --shm-size=64G  --device=/dev/kfd --device=/dev/dri/ --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --ulimit memlock=-1:-1 --ipc=host --network host --group-add video -v /opt/hyhal:/opt/hyhal -v <Host Path>:<Container Path> <Image ID> /bin/bash
+
 ```

 `Tips：若在K100/Z100L上使用，使用定制镜像docker pull image.sourcefind.cn:5000/dcu/admin/base/custom:vllm0.5.0-dtk24.04.1-ubuntu20.04-py310-zk-v1,K100/Z100L不支持awq量化`
@@ -53,14 +54,15 @@ docker run -it --name qwen1.5_vllm --privileged --shm-size=64G  --device=/dev/kf
 ```
 # <Host Path>主机端路径
 # <Container Path>容器映射路径
-docker build -t qwen1.5:latest .
-docker run -it --name qwen1.5_vllm --privileged --shm-size=64G  --device=/dev/kfd --device=/dev/dri/ --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --ulimit memlock=-1:-1 --ipc=host --network host --group-add video -v /opt/hyhal:/opt/hyhal:ro -v <Host Path>:<Container Path> qwen1.5:latest /bin/bash
+docker build -t llava:latest .
+docker run -it --name llava_vllm --privileged --shm-size=64G  --device=/dev/kfd --device=/dev/dri/ --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --ulimit memlock=-1:-1 --ipc=host --network host --group-add video -v /opt/hyhal:/opt/hyhal:ro -v <Host Path>:<Container Path> llava:latest /bin/bash
+
 ```

 ### Anaconda（方法三）

 ```
-conda create -n qwen1.5_vllm python=3.10
+conda create -n llava_vllm python=3.10
 ```

 关于本项目DCU显卡所需的特殊深度学习库可从[光合](https://developer.hpccube.com/tool/)开发者社区下载安装。
@@ -84,20 +86,94 @@ conda create -n qwen1.5_vllm python=3.10

 ### 模型下载

-| 基座模型                                                       | chat模型                                                                    | GPTQ模型                                                                                   | AWQ模型                                                                                      |
-| -------------------------------------------------------------- | --------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------- |
-| [Qwen-7B](https://huggingface.co/Qwen/Qwen-7B)                    | [Qwen-7B-Chat](http://113.200.138.88:18080/aimodels/Qwen-7B-Chat)              | [Qwen-7B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen-7B-Chat-Int4)                       |                                                                                              |
-| [Qwen-14B](https://huggingface.co/Qwen/Qwen-14B)                  | [Qwen-14B-Chat](http://113.200.138.88:18080/aimodels/Qwen-14B-Chat)            | [Qwen-14B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen-14B-Chat-Int4)                     |                                                                                              |
-| [Qwen-72B](http://113.200.138.88:18080/aimodels/qwen/Qwen-72B)    | [Qwen-72B-Chat](http://113.200.138.88:18080/aimodels/Qwen-72B-Chat)            | [Qwen-72B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen-72B-Chat-Int4)                     |                                                                                              |
-| [Qwen1.5-7B](https://huggingface.co/Qwen/Qwen1.5-7B)              | [Qwen1.5-7B-Chat](https://huggingface.co/Qwen/Qwen1.5-7B-Chat)                 | [Qwen1.5-7B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-7B-Chat-GPTQ-Int4)            | [Qwen1.5-7B-Chat-AWQ-Int4](http://113.200.138.88:18080/aimodels/qwen/Qwen1.5-7B-Chat-AWQ)       |
-| [Qwen1.5-14B](https://huggingface.co/Qwen/Qwen1.5-14B)            | [Qwen1.5-14B-Chat](http://113.200.138.88:18080/aimodels/qwen/Qwen1.5-14B-Chat) | [Qwen1.5-14B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-14B-Chat-GPTQ-Int4)          | [Qwen1.5-14B-Chat-AWQ-Int4](http://113.200.138.88:18080/aimodels/qwen/Qwen1.5-14B-Chat-AWQ)     |
-| [Qwen1.5-32B](http://113.200.138.88:18080/aimodels/Qwen1.5-32B)   | [Qwen1.5-32B-Chat](http://113.200.138.88:18080/aimodels/Qwen1.5-32B-Chat)      | [Qwen1.5-32B-Chat-GPTQ-Int4](http://113.200.138.88:18080/aimodels/Qwen1.5-32B-Chat-GPTQ-Int4) | [Qwen1.5-32B-Chat-AWQ-Int4](https://huggingface.co/Qwen/Qwen1.5-32B-Chat-AWQ)                   |
-| [Qwen1.5-72B](http://113.200.138.88:18080/aimodels/Qwen1.5-72B)   | [Qwen1.5-72B-Chat](http://113.200.138.88:18080/aimodels/Qwen1.5-72B-Chat)      | [Qwen1.5-72B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-72B-Chat-GPTQ-Int4)          | [Qwen1.5-72B-Chat-AWQ-Int4](http://113.200.138.88:18080/aimodels/qwen/Qwen1.5-72B-Chat-AWQ)     |
-| [Qwen1.5-110B](http://113.200.138.88:18080/aimodels/Qwen1.5-110B) | [Qwen1.5-110B-Chat](http://113.200.138.88:18080/aimodels/Qwen1.5-110B-Chat)    | [Qwen1.5-110B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-110B-Chat-GPTQ-Int4)        | [Qwen1.5-110B-Chat-AWQ-Int4](http://113.200.138.88:18080/aimodels/qwen/Qwen1.5-110B-Chat-AWQ)   |
-| [Qwen2-7B](http://113.200.138.88:18080/aimodels/Qwen2-7B)         | [Qwen2-7B-Instruct](http://113.200.138.88:18080/aimodels/Qwen2-7B-Instruct)    | [Qwen2-7B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-7B-Instruct-GPTQ-Int4)        | [Qwen2-7B-Instruct-AWQ-Int4](http://113.200.138.88:18080/aimodels/qwen/Qwen2-7B-Instruct-AWQ)   |
-| [Qwen2-72B](http://113.200.138.88:18080/aimodels/Qwen2-72B)       | [Qwen2-72B-Instruct](http://113.200.138.88:18080/aimodels/Qwen2-72B-Instruct)  | [Qwen2-72B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-72B-Instruct-GPTQ-Int4)      | [Qwen2-72B-Instruct-AWQ-Int4](http://113.200.138.88:18080/aimodels/qwen/Qwen2-72B-Instruct-AWQ) |
-
-### 离线批量推理
+| 基座模型                                                         |                                                                     |                                                                                 |
+| ---------------------------------------------------------------- | ------------------------------------------------------------------- | ------------------------------------------------------------------------------- |
+| [llava-v1.5-7b](http://113.200.138.88:18080/aimodels/llava-v1.5-7b) | [llava-v1.6-34b-hf](https://huggingface.co/llava-hf/llava-v1.6-34b-hf) | [llava-v1.6-vicuna-7b-hf](https://huggingface.co/llava-hf/llava-v1.6-vicuna-7b-hf) |
+
+### 推理
+
+```
+import argparse
+import os
+
+import torch
+from PIL import Image
+
+from vllm import LLM, SamplingParams
+from vllm.multimodal.image import ImageFeatureData, ImagePixelData
+
+
+def run_llava_pixel_values(*, disable_image_processor: bool = False):
+    llm = LLM(
+        model="llava/llava-1.5-7b-hf",
+        image_input_type="pixel_values",
+        image_token_id=32000,
+        image_input_shape="1,3,336,336",
+        image_feature_size=576,
+        disable_image_processor=disable_image_processor,
+    )
+
+    prompt = "<image>" * 576 + (
+        "\nUSER: What is the content of this image?\nASSISTANT:")
+
+    if disable_image_processor:
+        image = torch.load("images/stop_sign_pixel_values.pt")
+    else:
+        image = Image.open("/images/stop_sign.jpg") #图片位置
+
+    outputs = llm.generate({
+        "prompt": prompt,
+        "multi_modal_data": ImagePixelData(image),
+    })
+
+    for o in outputs:
+        generated_text = o.outputs[0].text
+        print(generated_text)
+
+
+def run_llava_image_features():
+    llm = LLM(
+        model="llava/llava-1.5-7b-hf",
+        image_input_type="image_features",
+        image_token_id=32000,
+        image_input_shape="1,576,1024",
+        image_feature_size=576,
+    )
+
+    prompt = "<image>" * 576 + (
+        "\nUSER: What is the content of this image?\nASSISTANT:")
+
+    image: torch.Tensor = torch.load("images/stop_sign_image_features.pt")
+
+    sampling_params = SamplingParams(temperature=0, max_tokens=64)
+    outputs = llm.generate({
+            "prompt": prompt,
+            "multi_modal_data": ImageFeatureData(image),
+        }, sampling_params=sampling_params)
+    for o in outputs:
+        generated_text = o.outputs[0].text
+        print(generated_text)
+
+
+def main(args):
+    if args.type == "pixel_values":
+        run_llava_pixel_values()
+    else:
+        run_llava_image_features()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Demo on Llava")
+    parser.add_argument("--type",
+                        type=str,
+                        choices=["pixel_values", "image_features"],
+                        default="pixel_values",
+                        help="image input type")
+    args = parser.parse_args()
+   
+    main(args)
+
+```

 ```bash
 python examples/offline_inference.py