Update GLM5

20f4e124 · chenych · dc06c77b · 20f4e124 · dc06c77b · dc06c77b
Commit 20f4e124 authored Feb 12, 2026 by chenych
10 changed files
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@

 ```bash
 docker run -it \
-    --shm-size 60g \
+    --shm-size 200g \
    --network=host \
    --name glm-5 \
    --privileged \
@@ -110,21 +110,24 @@ ray start --address='x.x.x.x:6379' --num-gpus=8 --num-cpus=32
 3. 启动vllm server
 ```bash
 vllm serve zai-org/GLM-5 \
-     --port 8001 \
-     --trust-remote-code \
-     --tensor-parallel-size 32 \
-     --gpu-memory-utilization 0.85 \
-     --speculative-config.method mtp \
-     --speculative-config.num_speculative_tokens 1 \
-     --tool-call-parser glm47 \
-     --reasoning-parser glm45 \
-     --enable-auto-tool-choice \
-     --served-model-name glm-5
+    --port 8001 \
+    --trust-remote-code \
+    --tensor-parallel-size 32 \
+    --gpu-memory-utilization 0.85 \
+    --distributed-executor-backend ray \
+    --dtype bfloat16 \
+    --max-model-len 32768 \
+    --speculative-config.method mtp \
+    --speculative-config.num_speculative_tokens 1 \
+    --tool-call-parser glm47 \
+    --reasoning-parser glm45 \
+    --enable-auto-tool-choice \
+    --served-model-name glm-5
 ```

 启动完成后可通过以下方式访问：
 ```bash
-curl http://localhost:8001/v1/chat/completions   \
+curl http://12.12.12.83:8001/v1/chat/completions   \
    -H "Content-Type: application/json"  \
    -d '{
        "model": "glm-5",
@@ -132,14 +135,14 @@ curl http://localhost:8001/v1/chat/completions   \
          {"role": "system", "content": "You are a helpful assistant."},
          {"role": "user", "content": "Summarize GLM-5 in one sentence."}
        ],
-        "max_tokens": 4096,
-        "temperature": 1
+        "max_tokens": 200,
+        "temperature": 0.7
    }'
 ```

 ## 效果展示
 <div align=center>
-    <img src="./doc/xxx.png"/>
+    <img src="./doc/result.png"/>
 </div>

 ### 精度

--- a/doc/demo.jpeg
+++ b/doc/demo.jpeg
--- a/doc/dog.jpg
+++ b/doc/dog.jpg
--- a/doc/perform.png
+++ b/doc/perform.png
--- a/doc/qwen3vl_arc.jpg
+++ b/doc/qwen3vl_arc.jpg
--- a/doc/result.png
+++ b/doc/result.png
--- a/doc/result_multi_images.png
+++ b/doc/result_multi_images.png
--- a/doc/result_vedio.png
+++ b/doc/result_vedio.png
--- a/doc/space_woaudio.mp4
+++ b/doc/space_woaudio.mp4
--- a/vllm-0.11.0+das.opt1.rc3.dtk2604-cp310-cp310-linux_x86_64.whl
+++ b/vllm-0.11.0+das.opt1.rc3.dtk2604-cp310-cp310-linux_x86_64.whl