Update README.

47df8a3f · chenych · ff743322 · 47df8a3f · 47df8a3f · 47df8a3f
Commit 47df8a3f authored Nov 06, 2025 by chenych
5 changed files
--- a/README.md
+++ b/README.md
@@ -18,31 +18,43 @@ PaddleOCR-VL 将复杂的文档解析任务分解为两个阶段。第一阶段
 ### 硬件需求
 DCU型号：K100AI，节点数量：1台，卡数：1张。

-`-v 挂载路径` 请根据实际情况修改。
+`-v 挂载路径`、`docker_name`根据实际情况修改

 ### Docker（方法一）
 ```bash
 docker pull image.sourcefind.cn:5000/dcu/admin/base/vllm:0.9.2-ubuntu22.04-dtk25.04.2-py3.10

-docker run -it --shm-size 200g --network=host --name paddleocr-vl --privileged --device=/dev/kfd --device=/dev/dri --device=/dev/mkfd --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -u root -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal/:/opt/hyhal/:ro image.sourcefind.cn:5000/dcu/admin/base/vllm:0.9.2-ubuntu22.04-dtk25.04.2-py3.10 bash
+docker run -it --shm-size 200g --network=host --name {docker_name} --privileged --device=/dev/kfd --device=/dev/dri --device=/dev/mkfd --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -u root -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal/:/opt/hyhal/:ro image.sourcefind.cn:5000/dcu/admin/base/vllm:0.9.2-ubuntu22.04-dtk25.04.2-py3.10 bash

 cd /your_code_path/paddleocr-vl_paddle
 python -m pip install paddlepaddle-dcu==3.2.1 -i https://www.paddlepaddle.org.cn/packages/stable/dcu/
 python -m pip install -U "paddleocr[doc-parser]"
 python -m pip install https://paddle-whl.bj.bcebos.com/nightly/cu126/safetensors/safetensors-0.6.2.dev0-cp38-abi3-linux_x86_64.whl
+pip install paddlex==3.3.8
+# /your/env/path/of/ 可以通过pip show paddlex命令查看
+cp replace/__init__.py /your/env/path/of/paddlex/inference/genai/
+cp replace/server.py /your/env/path/of/paddlex/inference/genai/
+cp replace/vllm.py /your/env/path/of/paddlex/inference/genai/backends/
+cp replace/paddleocr_vl_09b.py /your/env/path/of/paddlex/inference/genai/configs/
 ```

 ### Dockerfile（方法二）
 ```bash
 cd docker
-docker build --no-cache -t paddleocr-vl-dcu:latest .
+docker build --no-cache -t paddleocr-vl:latest .

-docker run -it --shm-size 200g --network=host --name paddleocr-vl --privileged --device=/dev/kfd --device=/dev/dri --device=/dev/mkfd --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -u root -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal/:/opt/hyhal/:ro image.sourcefind.cn:5000/dcu/admin/base/vllm:0.9.2-ubuntu22.04-dtk25.04.2-py3.10 bash
+docker run -it --shm-size 200g --network=host --name {docker_name} --privileged --device=/dev/kfd --device=/dev/dri --device=/dev/mkfd --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -u root -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal/:/opt/hyhal/:ro image.sourcefind.cn:5000/dcu/admin/base/vllm:0.9.2-ubuntu22.04-dtk25.04.2-py3.10 bash

 cd /your_code_path/paddleocr-vl_paddle
 python -m pip install paddlepaddle-dcu==3.2.1 -i https://www.paddlepaddle.org.cn/packages/stable/dcu/
 python -m pip install -U "paddleocr[doc-parser]"
 python -m pip install https://paddle-whl.bj.bcebos.com/nightly/cu126/safetensors/safetensors-0.6.2.dev0-cp38-abi3-linux_x86_64.whl
+pip install paddlex==3.3.8
+# /your/env/path/of/ 可以通过pip show paddlex命令查看
+cp replace/__init__.py /your/env/path/of/paddlex/inference/genai/
+cp replace/server.py /your/env/path/of/paddlex/inference/genai/
+cp replace/vllm.py /your/env/path/of/paddlex/inference/genai/backends/
+cp replace/paddleocr_vl_09b.py /your/env/path/of/paddlex/inference/genai/configs/
 ```

 ### Anaconda（方法三）
@@ -59,6 +71,12 @@ pip install -r requirements.txt
 python -m pip install paddlepaddle-dcu==3.2.1 -i https://www.paddlepaddle.org.cn/packages/stable/dcu/
 python -m pip install -U "paddleocr[doc-parser]"
 python -m pip install https://paddle-whl.bj.bcebos.com/nightly/cu126/safetensors/safetensors-0.6.2.dev0-cp38-abi3-linux_x86_64.whl
+pip install paddlex==3.3.8
+# /your/env/path/of/ 可以通过pip show paddlex命令查看
+cp replace/__init__.py /your/env/path/of/paddlex/inference/genai/
+cp replace/server.py /your/env/path/of/paddlex/inference/genai/
+cp replace/vllm.py /your/env/path/of/paddlex/inference/genai/backends/
+cp replace/paddleocr_vl_09b.py /your/env/path/of/paddlex/inference/genai/configs/
 ```

 ## 数据集

--- a/replace/__init__.py
+++ b/replace/__init__.py
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ...utils.deps import require_genai_engine_plugin
+
+# require_genai_engine_plugin()
--- a/replace/paddleocr_vl_09b.py
+++ b/replace/paddleocr_vl_09b.py
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def get_config(backend):
+    if backend == "fastdeploy":
+        return {
+            "gpu-memory-utilization": 0.7,
+            "max-model-len": 16384,
+            "max-num-batched-tokens": 16384,
+            "max-num-seqs": 256,
+            "workers": 2,
+            "graph-optimization-config": '{"graph_opt_level":0, "use_cudagraph":true}',
+        }
+    elif backend == "vllm":
+        return {
+            "trust-remote-code": True,
+            "gpu-memory-utilization": 0.5,
+            "max-model-len": 16384,
+            "max-num-batched-tokens": 131072,
+            #"api-server-count": 4,
+        }
+    elif backend == "sglang":
+        return {
+            "trust-remote-code": True,
+            "mem-fraction-static": 0.5,
+            "context-length": 16384,
+            "max-prefill-tokens": 131072,
+        }
+    else:
+        raise ValueError(f"Unsupported backend: {backend}")
--- a/replace/server.py
+++ b/replace/server.py
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import sys
+
+from ...utils import logging
+from ...utils.deps import is_genai_engine_plugin_available
+from .configs.utils import load_backend_config, update_backend_config
+from .constants import DEFAULT_BACKEND, SUPPORTED_BACKENDS
+from .models import get_chat_template_path, get_default_config, get_model_dir
+
+
+def get_arg_parser():
+    parser = argparse.ArgumentParser("PaddleX generative AI server.")
+    parser.add_argument("--model_name", type=str, required=True)
+    parser.add_argument("--model_dir", type=str)
+    parser.add_argument("--host", type=str, default="localhost")
+    parser.add_argument("--port", type=int, default=8000)
+    parser.add_argument(
+        "--backend", type=str, choices=SUPPORTED_BACKENDS, default=DEFAULT_BACKEND
+    )
+    parser.add_argument(
+        "--backend_config", type=str, help="Path to the backend configuration file."
+    )
+    return parser
+
+
+def run_genai_server(args):
+    plugin_name = f"{args.backend}-server"
+    # if not is_genai_engine_plugin_available(plugin_name):
+    #    logging.error(
+    #        f"The '{plugin_name}' plugin is not available. Please install it first."
+    #    )
+    #    sys.exit(1)
+
+    if args.backend == "fastdeploy":
+        from .backends.fastdeploy import run_fastdeploy_server
+
+        run_server_func = run_fastdeploy_server
+    elif args.backend == "vllm":
+        from .backends.vllm import run_vllm_server
+
+        run_server_func = run_vllm_server
+    elif args.backend == "sglang":
+        from .backends.sglang import run_sglang_server
+
+        run_server_func = run_sglang_server
+    else:
+        raise AssertionError
+
+    if args.model_dir:
+        model_dir = args.model_dir
+    else:
+        try:
+            model_dir = get_model_dir(args.model_name, args.backend)
+        except Exception:
+            logging.error("Failed to get model directory", exc_info=True)
+            sys.exit(1)
+
+    if args.backend_config:
+        try:
+            backend_config = load_backend_config(args.backend_config)
+        except Exception:
+            logging.error(
+                f"Failed to load backend configuration from file: {args.backend_config}",
+                exc_info=True,
+            )
+            sys.exit(1)
+    else:
+        backend_config = {}
+
+    try:
+        default_config = get_default_config(args.model_name, args.backend)
+    except Exception:
+        logging.error(
+            f"Failed to get default configuration for the model", exc_info=True
+        )
+        sys.exit(1)
+    update_backend_config(
+        default_config,
+        backend_config,
+    )
+    backend_config = default_config
+
+    with get_chat_template_path(
+        args.model_name, args.backend, model_dir
+    ) as chat_template_path:
+        run_server_func(
+            args.host,
+            args.port,
+            args.model_name,
+            model_dir,
+            backend_config,
+            chat_template_path,
+        )
+
+
+def main(args=None):
+    parser = get_arg_parser()
+    args = parser.parse_args(args=args)
+    run_genai_server(args)
+
+
+if __name__ == "__main__":
+    main()
--- a/replace/vllm.py
+++ b/replace/vllm.py
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ....utils.deps import is_genai_engine_plugin_available, require_genai_engine_plugin
+from ..configs.utils import (
+    backend_config_to_args,
+    set_config_defaults,
+    update_backend_config,
+)
+from ..models import ALL_MODEL_NAMES, get_model_components
+
+
+def register_models():
+    from vllm import ModelRegistry
+
+    # if is_genai_engine_plugin_available("vllm-server"):
+    if True:
+        for model_name in ALL_MODEL_NAMES:
+            if model_name not in ModelRegistry.get_supported_archs():
+                net_cls, _ = get_model_components(model_name, "vllm")
+                ModelRegistry.register_model(net_cls.__name__, net_cls)
+
+
+def run_vllm_server(host, port, model_name, model_dir, config, chat_template_path):
+    # require_genai_engine_plugin("vllm-server")
+
+    import uvloop
+    from vllm.entrypoints.openai.api_server import (
+        FlexibleArgumentParser,
+        cli_env_setup,
+        make_arg_parser,
+        run_server,
+        validate_parsed_serve_args,
+    )
+
+    cli_env_setup()
+    parser = FlexibleArgumentParser()
+    parser = make_arg_parser(parser)
+
+    set_config_defaults(config, {"served-model-name": model_name})
+
+    if chat_template_path:
+        set_config_defaults(config, {"chat-template": str(chat_template_path)})
+
+    update_backend_config(
+        config,
+        {
+            "model": model_dir,
+            "host": host,
+            "port": port,
+        },
+    )
+
+    args = backend_config_to_args(config)
+    args = parser.parse_args(args)
+    validate_parsed_serve_args(args)
+
+    uvloop.run(run_server(args))