Commit 47df8a3f authored by chenych's avatar chenych
Browse files

Update README.

parent ff743322
......@@ -18,31 +18,43 @@ PaddleOCR-VL 将复杂的文档解析任务分解为两个阶段。第一阶段
### 硬件需求
DCU型号:K100AI,节点数量:1台,卡数:1张。
`-v 挂载路径`根据实际情况修改
`-v 挂载路径``docker_name`根据实际情况修改
### Docker(方法一)
```bash
docker pull image.sourcefind.cn:5000/dcu/admin/base/vllm:0.9.2-ubuntu22.04-dtk25.04.2-py3.10
docker run -it --shm-size 200g --network=host --name paddleocr-vl --privileged --device=/dev/kfd --device=/dev/dri --device=/dev/mkfd --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -u root -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal/:/opt/hyhal/:ro image.sourcefind.cn:5000/dcu/admin/base/vllm:0.9.2-ubuntu22.04-dtk25.04.2-py3.10 bash
docker run -it --shm-size 200g --network=host --name {docker_name} --privileged --device=/dev/kfd --device=/dev/dri --device=/dev/mkfd --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -u root -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal/:/opt/hyhal/:ro image.sourcefind.cn:5000/dcu/admin/base/vllm:0.9.2-ubuntu22.04-dtk25.04.2-py3.10 bash
cd /your_code_path/paddleocr-vl_paddle
python -m pip install paddlepaddle-dcu==3.2.1 -i https://www.paddlepaddle.org.cn/packages/stable/dcu/
python -m pip install -U "paddleocr[doc-parser]"
python -m pip install https://paddle-whl.bj.bcebos.com/nightly/cu126/safetensors/safetensors-0.6.2.dev0-cp38-abi3-linux_x86_64.whl
pip install paddlex==3.3.8
# /your/env/path/of/ 可以通过pip show paddlex命令查看
cp replace/__init__.py /your/env/path/of/paddlex/inference/genai/
cp replace/server.py /your/env/path/of/paddlex/inference/genai/
cp replace/vllm.py /your/env/path/of/paddlex/inference/genai/backends/
cp replace/paddleocr_vl_09b.py /your/env/path/of/paddlex/inference/genai/configs/
```
### Dockerfile(方法二)
```bash
cd docker
docker build --no-cache -t paddleocr-vl-dcu:latest .
docker build --no-cache -t paddleocr-vl:latest .
docker run -it --shm-size 200g --network=host --name paddleocr-vl --privileged --device=/dev/kfd --device=/dev/dri --device=/dev/mkfd --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -u root -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal/:/opt/hyhal/:ro image.sourcefind.cn:5000/dcu/admin/base/vllm:0.9.2-ubuntu22.04-dtk25.04.2-py3.10 bash
docker run -it --shm-size 200g --network=host --name {docker_name} --privileged --device=/dev/kfd --device=/dev/dri --device=/dev/mkfd --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -u root -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal/:/opt/hyhal/:ro image.sourcefind.cn:5000/dcu/admin/base/vllm:0.9.2-ubuntu22.04-dtk25.04.2-py3.10 bash
cd /your_code_path/paddleocr-vl_paddle
python -m pip install paddlepaddle-dcu==3.2.1 -i https://www.paddlepaddle.org.cn/packages/stable/dcu/
python -m pip install -U "paddleocr[doc-parser]"
python -m pip install https://paddle-whl.bj.bcebos.com/nightly/cu126/safetensors/safetensors-0.6.2.dev0-cp38-abi3-linux_x86_64.whl
pip install paddlex==3.3.8
# /your/env/path/of/ 可以通过pip show paddlex命令查看
cp replace/__init__.py /your/env/path/of/paddlex/inference/genai/
cp replace/server.py /your/env/path/of/paddlex/inference/genai/
cp replace/vllm.py /your/env/path/of/paddlex/inference/genai/backends/
cp replace/paddleocr_vl_09b.py /your/env/path/of/paddlex/inference/genai/configs/
```
### Anaconda(方法三)
......@@ -59,6 +71,12 @@ pip install -r requirements.txt
python -m pip install paddlepaddle-dcu==3.2.1 -i https://www.paddlepaddle.org.cn/packages/stable/dcu/
python -m pip install -U "paddleocr[doc-parser]"
python -m pip install https://paddle-whl.bj.bcebos.com/nightly/cu126/safetensors/safetensors-0.6.2.dev0-cp38-abi3-linux_x86_64.whl
pip install paddlex==3.3.8
# /your/env/path/of/ 可以通过pip show paddlex命令查看
cp replace/__init__.py /your/env/path/of/paddlex/inference/genai/
cp replace/server.py /your/env/path/of/paddlex/inference/genai/
cp replace/vllm.py /your/env/path/of/paddlex/inference/genai/backends/
cp replace/paddleocr_vl_09b.py /your/env/path/of/paddlex/inference/genai/configs/
```
## 数据集
......
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ...utils.deps import require_genai_engine_plugin
# require_genai_engine_plugin()
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
def get_config(backend):
if backend == "fastdeploy":
return {
"gpu-memory-utilization": 0.7,
"max-model-len": 16384,
"max-num-batched-tokens": 16384,
"max-num-seqs": 256,
"workers": 2,
"graph-optimization-config": '{"graph_opt_level":0, "use_cudagraph":true}',
}
elif backend == "vllm":
return {
"trust-remote-code": True,
"gpu-memory-utilization": 0.5,
"max-model-len": 16384,
"max-num-batched-tokens": 131072,
#"api-server-count": 4,
}
elif backend == "sglang":
return {
"trust-remote-code": True,
"mem-fraction-static": 0.5,
"context-length": 16384,
"max-prefill-tokens": 131072,
}
else:
raise ValueError(f"Unsupported backend: {backend}")
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import sys
from ...utils import logging
from ...utils.deps import is_genai_engine_plugin_available
from .configs.utils import load_backend_config, update_backend_config
from .constants import DEFAULT_BACKEND, SUPPORTED_BACKENDS
from .models import get_chat_template_path, get_default_config, get_model_dir
def get_arg_parser():
parser = argparse.ArgumentParser("PaddleX generative AI server.")
parser.add_argument("--model_name", type=str, required=True)
parser.add_argument("--model_dir", type=str)
parser.add_argument("--host", type=str, default="localhost")
parser.add_argument("--port", type=int, default=8000)
parser.add_argument(
"--backend", type=str, choices=SUPPORTED_BACKENDS, default=DEFAULT_BACKEND
)
parser.add_argument(
"--backend_config", type=str, help="Path to the backend configuration file."
)
return parser
def run_genai_server(args):
plugin_name = f"{args.backend}-server"
# if not is_genai_engine_plugin_available(plugin_name):
# logging.error(
# f"The '{plugin_name}' plugin is not available. Please install it first."
# )
# sys.exit(1)
if args.backend == "fastdeploy":
from .backends.fastdeploy import run_fastdeploy_server
run_server_func = run_fastdeploy_server
elif args.backend == "vllm":
from .backends.vllm import run_vllm_server
run_server_func = run_vllm_server
elif args.backend == "sglang":
from .backends.sglang import run_sglang_server
run_server_func = run_sglang_server
else:
raise AssertionError
if args.model_dir:
model_dir = args.model_dir
else:
try:
model_dir = get_model_dir(args.model_name, args.backend)
except Exception:
logging.error("Failed to get model directory", exc_info=True)
sys.exit(1)
if args.backend_config:
try:
backend_config = load_backend_config(args.backend_config)
except Exception:
logging.error(
f"Failed to load backend configuration from file: {args.backend_config}",
exc_info=True,
)
sys.exit(1)
else:
backend_config = {}
try:
default_config = get_default_config(args.model_name, args.backend)
except Exception:
logging.error(
f"Failed to get default configuration for the model", exc_info=True
)
sys.exit(1)
update_backend_config(
default_config,
backend_config,
)
backend_config = default_config
with get_chat_template_path(
args.model_name, args.backend, model_dir
) as chat_template_path:
run_server_func(
args.host,
args.port,
args.model_name,
model_dir,
backend_config,
chat_template_path,
)
def main(args=None):
parser = get_arg_parser()
args = parser.parse_args(args=args)
run_genai_server(args)
if __name__ == "__main__":
main()
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ....utils.deps import is_genai_engine_plugin_available, require_genai_engine_plugin
from ..configs.utils import (
backend_config_to_args,
set_config_defaults,
update_backend_config,
)
from ..models import ALL_MODEL_NAMES, get_model_components
def register_models():
from vllm import ModelRegistry
# if is_genai_engine_plugin_available("vllm-server"):
if True:
for model_name in ALL_MODEL_NAMES:
if model_name not in ModelRegistry.get_supported_archs():
net_cls, _ = get_model_components(model_name, "vllm")
ModelRegistry.register_model(net_cls.__name__, net_cls)
def run_vllm_server(host, port, model_name, model_dir, config, chat_template_path):
# require_genai_engine_plugin("vllm-server")
import uvloop
from vllm.entrypoints.openai.api_server import (
FlexibleArgumentParser,
cli_env_setup,
make_arg_parser,
run_server,
validate_parsed_serve_args,
)
cli_env_setup()
parser = FlexibleArgumentParser()
parser = make_arg_parser(parser)
set_config_defaults(config, {"served-model-name": model_name})
if chat_template_path:
set_config_defaults(config, {"chat-template": str(chat_template_path)})
update_backend_config(
config,
{
"model": model_dir,
"host": host,
"port": port,
},
)
args = backend_config_to_args(config)
args = parser.parse_args(args)
validate_parsed_serve_args(args)
uvloop.run(run_server(args))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment