Delete vllm

dc061ea0 · chenych · 7b025bd5 · dc061ea0 · 7b025bd5 · 7b025bd5
Commit dc061ea0 authored Jun 11, 2025 by chenych
6 changed files
--- a/README.md
+++ b/README.md
@@ -27,6 +27,7 @@ docker pull image.sourcefind.cn:5000/dcu/admin/base/custom:vllm0.8.5-ubuntu22.04
 docker run -it --shm-size 200g --network=host --name {docker_name} --privileged --device=/dev/kfd --device=/dev/dri --device=/dev/mkfd --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -u root -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal/:/opt/hyhal/:ro {imageID} bash

 cd /your_code_path/mistral-small_pytorch
+pip install mistral_common>=1.5.4
 ```

 ### Dockerfile（方法二）
@@ -36,6 +37,7 @@ docker build --no-cache -t mistral-small:latest .
 docker run -it --shm-size 200g --network=host --name {docker_name} --privileged --device=/dev/kfd --device=/dev/dri --device=/dev/mkfd --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -u root -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal/:/opt/hyhal/:ro {imageID} bash

 cd /your_code_path/mistral-small_pytorch
+pip install mistral_common>=1.5.4
 ```

 ### Anaconda（方法三）
@@ -49,6 +51,11 @@ deepspeed: 0.14.2+das.opt2.dtk2504
 ```
 `Tips：以上dtk驱动、python、torch等DCU相关工具版本需要严格一一对应`

+其它非深度学习库安装方式如下：
+```bash
+pip install mistral_common>=1.5.4
+```
+
 ## 数据集
 无

@@ -77,28 +84,10 @@ SFT训练脚本示例，参考`llama-factory/train_lora`下对应yaml文件。
 参数解释同[#全参微调](#全参微调)

 ## 推理
-### vllm推理方法
-#### offline
-```bash
-python infer_vllm.py --model_name_or_path /path_of/model
-```
-
-#### server
-1. 启动服务
-```bash
-vllm serve mistralai/Mistral-Small-3.1-24B-Instruct-2503 --tokenizer_mode mistral --config_format mistral --load_format mistral --tool-call-parser mistral --enable-auto-tool-choice --limit_mm_per_prompt 'image=10' -tp 2 --port 8001 --trust-remote-code --enforce-eager --served-model-name Mistral-Small
-```
-2. 测试结果
-```bash
-## 必须添加HF_ENDPOINT环境变量
-export HF_ENDPOINT=https://hf-mirror.com
-python infer_client.py
-```
+暂无

 ## result
-<div align=center>
-    <img src="./doc/results.png"/>
-</div>
+暂无

 ### 精度
 DCU与GPU精度一致，推理框架：pytorch。

--- a/infer_client.py
+++ b/infer_client.py
-import requests
-import json
-from huggingface_hub import hf_hub_download
-from datetime import datetime, timedelta
-
-url = "http://<your-server-url>:8000/v1/chat/completions"
-headers = {"Content-Type": "application/json", "Authorization": "Bearer token"}
-
-model = "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
-
-
-def load_system_prompt(repo_id: str, filename: str) -> str:
-    file_path = hf_hub_download(repo_id=repo_id, filename=filename)
-    with open(file_path, "r") as file:
-        system_prompt = file.read()
-    today = datetime.today().strftime("%Y-%m-%d")
-    yesterday = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d")
-    model_name = repo_id.split("/")[-1]
-    return system_prompt.format(name=model_name, today=today, yesterday=yesterday)
-
-
-SYSTEM_PROMPT = load_system_prompt(model, "SYSTEM_PROMPT.txt")
-
-image_url = "https://huggingface.co/datasets/patrickvonplaten/random_img/resolve/main/europe.png"
-
-messages = [
-    {"role": "system", "content": SYSTEM_PROMPT},
-    {
-        "role": "user",
-        "content": [
-            {
-                "type": "text",
-                "text": "Which of the depicted countries has the best food? Which the second and third and fourth? Name the country, its color on the map and one its city that is visible on the map, but is not the capital. Make absolutely sure to only name a city that can be seen on the map.",
-            },
-            {"type": "image_url", "image_url": {"url": image_url}},
-        ],
-    },
-]
-
-data = {"model": model, "messages": messages, "temperature": 0.15}
-
-response = requests.post(url, headers=headers, data=json.dumps(data))
-print(response.json()["choices"][0]["message"]["content"])
--- a/infer_vllm.py
+++ b/infer_vllm.py
-import argparse
-
-from vllm import LLM
-from vllm.sampling_params import SamplingParams
-
-# This script demonstrates how to use vLLM to run inference with a Mistral model.
-parse = argparse.ArgumentParser()
-parse.add_argument("--user_prompt", type=str, default="Give me 5 non-formal ways to say 'See you later' in French.")
-parse.add_argument("--model_name_or_path", type=str, default="mistralai/Mistral-Small-3.1-24B-Instruct-2503")
-
-args = parse.parse_args()
-
-# Define the system prompt for the conversational agent
-SYSTEM_PROMPT = "You are a conversational agent that always answers straight to the point, always end your accurate response with an ASCII drawing of a cat."
-
-messages = [
-    {
-        "role": "system",
-        "content": SYSTEM_PROMPT
-    },
-    {
-        "role": "user",
-        "content": args.user_prompt
-    },
-]
-
-# note that running this model on GPU requires over 60 GB of GPU RAM
-llm = LLM(model=args.model_name_or_path, tokenizer_mode="mistral")
-
-sampling_params = SamplingParams(max_tokens=512, temperature=0.15)
-outputs = llm.chat(messages, sampling_params=sampling_params)
-
-print(outputs[0].outputs[0].text)
\ No newline at end of file
--- a/llama-factory/train_full/mistral_full_sft.yaml
+++ b/llama-factory/train_full/mistral_full_sft.yaml
--- a/llama-factory/train_lora/mistral_lora_sft.yaml
+++ b/llama-factory/train_lora/mistral_lora_sft.yaml
--- a/model.properties
+++ b/model.properties
@@ -5,6 +5,6 @@ modelName=mistral-small_pytorch
 # 模型描述
 modelDescription=Mistral Small 3.1 (25.03) 是一款多用途模型，专为编程、数学推理、文档理解和对话等各种任务而设计。
 # 应用场景
-appScenario=推理,训练,对话问答,制造,广媒,家居,教育
+appScenario=训练,对话问答,制造,广媒,家居,教育
 # 框架类型
 frameType=pytorch