updata code

ba8c0ea1 · shihm · c9602254 · ba8c0ea1 · ba8c0ea1
Commit ba8c0ea1 authored Mar 05, 2026 by shihm
Show whitespace changes
Inline Side-by-side

Showing with 79 additions and 2 deletions

README.md README.md +79 -2

doc/result1.png doc/result1.png +0 -0

No files found.
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@ Baichuan-M3 是百川智能推出的全新一代医疗增强大语言模型，
 | transformers |                     4.57.6                     |
 |     vllm     | 0.11.0+das.opt1.rc2.dtk2604.20260128.g0bf89b0c | 
-推荐使用镜像：harbor.sourcefind.cn:5443/dcu/admin/base/vllm:0.11.0-ubuntu22.04-dtk26.04-0127-py3.10-20260129
+推荐使用镜像：harbor.sourcefind.cn:5443/dcu/admin/base/vllm:0.11.0-ubuntu22.04-dtk26.04-0130-py3.10-20260204
 - 挂载地址`-v`根据实际模型情况修改
@@ -39,7 +39,7 @@ docker run -it \
    -u root \
    -v /opt/hyhal/:/opt/hyhal/:ro \
    -v /path/your_code_data/:/path/your_code_data/ \
-    harbor.sourcefind.cn:5443/dcu/admin/base/vllm:0.11.0-ubuntu22.04-dtk26.04-0127-py3.10-20260129 bash
+    harbor.sourcefind.cn:5443/dcu/admin/base/vllm:0.11.0-ubuntu22.04-dtk26.04-0130-py3.10-20260204 bash
 ```
 更多镜像可前往[光源](https://sourcefind.cn/#/service-list)下载使用。
@@ -83,6 +83,83 @@ curl http://localhost:8000/v1/chat/completions   \
    <img src="./doc/result.png"/>
 </div>
+#### 多机推理
+加入环境变量
+```bash
+export ALLREDUCE_STREAM_WITH_COMPUTE=1
+export VLLM_HOST_IP=x.x.x.x # 对应计算节点的IP，选择IB口SOCKET_IFNAME对应IP地址
+export NCCL_SOCKET_IFNAME=ibxxxx
+export GLOO_SOCKET_IFNAME=ibxxxx
+export NCCL_IB_HCA=mlx5_0:1 # 环境中的IB网卡名字
+unset NCCL_ALGO
+export NCCL_MIN_NCHANNELS=16
+export NCCL_MAX_NCHANNELS=16
+export NCCL_NET_GDR_READ=1
+export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+export VLLM_SPEC_DECODE_EAGER=1
+export VLLM_MLA_DISABLE=0
+export VLLM_USE_FLASH_MLA=1
+export VLLM_RPC_TIMEOUT=1800000
+# K100_AI集群建议额外设置的环境变量：
+export VLLM_ENFORCE_EAGER_BS_THRESHOLD=44
+# 海光CPU绑定核
+export VLLM_NUMA_BIND=1
+export VLLM_RANK0_NUMA=0
+export VLLM_RANK1_NUMA=1
+export VLLM_RANK2_NUMA=2
+export VLLM_RANK3_NUMA=3
+export VLLM_RANK4_NUMA=4
+export VLLM_RANK5_NUMA=5
+export VLLM_RANK6_NUMA=6
+export VLLM_RANK7_NUMA=7
+```
+启动RAY集群
+x.x.x.x对应第一步的head节点VLLM_HOST_IP
+```bash
+# head节点执行
+ray start --head --node-ip-address=x.x.x.x --port=6379 --num-gpus=8 --num-cpus=32
+# worker节点执行
+ray start --address='x.x.x.x:6379' --num-gpus=8 --num-cpus=32
+```
+启动vllm server
+```bash
+vllm serve /path/to/baichuan-inc/Baichuan-M3-235B 
+    --host x.x.x.x --port 8000  
+    --distributed-executor-backend ray 
+    --tensor-parallel-size 8   
+    --pipeline-parallel-size 2  
+    --max-model-len 32768 
+    --gpu-memory-utilization 0.9 
+    --served-model-name baichuan-m3 
+    --reasoning-parser deepseek_r1
+```
+启动完成后可通过以下方式访问：
+```bash
+curl http://localhost:8000/v1/chat/completions   \
+    -H "Content-Type: application/json"  \
+    -d '{
+        "model": "baichuan-m3",
+        "messages": [
+            {
+                "role": "user",
+                "content": "下午头痛怎么办？"
+            }
+        ]
+}'
+```
+## 效果展示
+<div align=center>
+    <img src="./doc/result1.png"/>
+</div>
 ### transformers
 ```python

--- a/doc/result1.png
+++ b/doc/result1.png