Merge pull request #2681 from opendatalab/dev

Dev

Merge pull request #2681 from opendatalab/dev
Dev
13c23c47 · Xiaomeng Zhao · GitHub · f34644eb · 039cf27f · 13c23c47
Unverified Commit 13c23c47 authored Jun 17, 2025 by Xiaomeng Zhao Committed by GitHub Jun 17, 2025
5 changed files
--- a/README.md
+++ b/README.md
@@ -51,6 +51,9 @@ Easier to use: Just grab MinerU Desktop. No coding, no login, just a simple inte
 </div>
 # Changelog
+- 2025/06/17 2.0.4 Released 
+  - Fixed the issue where models were still required to be downloaded in the `sglang-client` mode  
+  - Fixed the issue where only the first instance would take effect when attempting to launch multiple `sglang-client` instances via multiple URLs within the same process
 - 2025/06/15 2.0.3 released
  - Fixed a configuration file key-value update error that occurred when downloading model type was set to `all`
  - Fixed the issue where the formula and table feature toggle switches were not working in `command line mode`, causing the features to remain enabled.
@@ -533,6 +536,14 @@ If you need to use **sglang to accelerate VLM model inference**, you can choose
 > [!TIP]
 > The Dockerfile uses `lmsysorg/sglang:v0.4.7-cu124` as the default base image. If necessary, you can modify it to another platform version.
+#### 1.4 Install client  (for connecting to sglang-server on edge devices that require only CPU and network connectivity)
+```bash
+uv pip install -U mineru
+mineru -p <input_path> -o <output_path> -b vlm-sglang-client -u http://<host_ip>:<port>
+```
 ---
 ### 2. Using MinerU

--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -50,6 +50,9 @@
 </div>
 # 更新记录
+- 2025/06/17 2.0.4发布
+  - 修复了`sglang-client`模式下依然需要下载模型的问题
+  - 修复了同一进程内尝试通过多个url启动多个`sglang-client`实例时，只有第一个生效的问题
 - 2025/06/15 2.0.3发布
  - 修复了当下载模型类型设置为`all`时，配置文件出现键值更新错误的问题
  - 修复了命令行模式下公式和表格功能开关不生效导致功能无法关闭的问题
@@ -522,6 +525,13 @@ uv pip install -e .[core] -i https://mirrors.aliyun.com/pypi/simple
 > [!TIP]
 > Dockerfile默认使用`lmsysorg/sglang:v0.4.7-cu124`作为基础镜像，如有需要，您可以自行修改为其他平台版本。
+#### 1.4 安装client（用于在仅需 CPU 和网络连接的边缘设备上连接 sglang-server）
+```bash
+uv pip install -U mineru -i https://mirrors.aliyun.com/pypi/simple
+mineru -p <input_path> -o <output_path> -b vlm-sglang-client -u http://<host_ip>:<port>
+```
 ---

--- a/mineru/backend/vlm/vlm_analyze.py
+++ b/mineru/backend/vlm/vlm_analyze.py
@@ -27,7 +27,7 @@ class ModelSingleton:
        model_path: str | None,
        server_url: str | None,
    ) -> BasePredictor:
-        key = (backend, server_url)
+        key = (backend, model_path, server_url)
        if key not in self._models:
            if backend in ['transformers', 'sglang-engine'] and not model_path:
                model_path = auto_download_and_get_model_root_path("/","vlm")

--- a/mineru/model/mfr/unimernet/Unimernet.py
+++ b/mineru/model/mfr/unimernet/Unimernet.py
@@ -21,7 +21,7 @@ class MathDataset(Dataset):
 class UnimernetModel(object):
    def __init__(self, weight_dir, _device_="cpu"):
        from .unimernet_hf import UnimernetModel
-        if _device_.startswith("mps"):
+        if _device_.startswith("mps") or _device_.startswith("npu"):
            self.model = UnimernetModel.from_pretrained(weight_dir, attn_implementation="eager")
        else:
            self.model = UnimernetModel.from_pretrained(weight_dir)

--- a/mineru/model/vlm_sglang_model/server.py
+++ b/mineru/model/vlm_sglang_model/server.py
@@ -6,10 +6,26 @@ from sglang.srt.entrypoints.http_server import app, generate_request, launch_ser
 from sglang.srt.managers.io_struct import GenerateReqInput
 from sglang.srt.server_args import prepare_server_args
 from sglang.srt.utils import kill_process_tree
+from sglang.srt.conversation import Conversation
 from mineru.utils.models_download_utils import auto_download_and_get_model_root_path
 from .logit_processor import Mineru2LogitProcessor
+# mineru2.0的chat_template与chatml在换行上有微小区别
+def custom_get_prompt(self) -> str:
+    system_prompt = self.system_template.format(system_message=self.system_message)
+    if self.system_message == "":
+        ret = ""
+    else:
+        ret = system_prompt + self.sep
+    for role, message in self.messages:
+        if message:
+            ret += role + "\n" + message + self.sep
+        else:
+            ret += role + "\n"
+    return ret
 _custom_logit_processor_str = Mineru2LogitProcessor().to_str()
 # remote the existing /generate route
@@ -45,6 +61,7 @@ def main():
    if server_args.chat_template is None:
        server_args.chat_template = "chatml"
+        Conversation.get_prompt = custom_get_prompt
    server_args.enable_custom_logit_processor = True