Commit e6f817fe authored by myhloli's avatar myhloli
Browse files

refactor: extract command-line argument parsing to cli_parser.py and update usage in main functions

parent 54913fc2
...@@ -4,6 +4,7 @@ import click ...@@ -4,6 +4,7 @@ import click
from pathlib import Path from pathlib import Path
from loguru import logger from loguru import logger
from mineru.utils.cli_parser import arg_parse
from mineru.utils.config_reader import get_device from mineru.utils.config_reader import get_device
from mineru.utils.model_utils import get_vram from mineru.utils.model_utils import get_vram
from ..version import __version__ from ..version import __version__
...@@ -145,42 +146,7 @@ def main( ...@@ -145,42 +146,7 @@ def main(
device_mode, virtual_vram, model_source, **kwargs device_mode, virtual_vram, model_source, **kwargs
): ):
# 解析额外参数 kwargs.update(arg_parse(ctx))
extra_kwargs = {}
i = 0
while i < len(ctx.args):
arg = ctx.args[i]
if arg.startswith('--'):
param_name = arg[2:].replace('-', '_') # 转换参数名格式
i += 1
if i < len(ctx.args) and not ctx.args[i].startswith('--'):
# 参数有值
try:
# 尝试转换为适当的类型
if ctx.args[i].lower() == 'true':
extra_kwargs[param_name] = True
elif ctx.args[i].lower() == 'false':
extra_kwargs[param_name] = False
elif '.' in ctx.args[i]:
try:
extra_kwargs[param_name] = float(ctx.args[i])
except ValueError:
extra_kwargs[param_name] = ctx.args[i]
else:
try:
extra_kwargs[param_name] = int(ctx.args[i])
except ValueError:
extra_kwargs[param_name] = ctx.args[i]
except:
extra_kwargs[param_name] = ctx.args[i]
else:
# 布尔型标志参数
extra_kwargs[param_name] = True
i -= 1
i += 1
# 将解析出的参数合并到kwargs
kwargs.update(extra_kwargs)
if not backend.endswith('-client'): if not backend.endswith('-client'):
def get_device_mode() -> str: def get_device_mode() -> str:
......
import uuid import uuid
import os import os
import uvicorn import uvicorn
import argparse import click
from pathlib import Path from pathlib import Path
from glob import glob from glob import glob
from fastapi import FastAPI, UploadFile, File, Form from fastapi import FastAPI, UploadFile, File, Form
...@@ -12,6 +12,7 @@ from loguru import logger ...@@ -12,6 +12,7 @@ from loguru import logger
from base64 import b64encode from base64 import b64encode
from mineru.cli.common import aio_do_parse, read_fn from mineru.cli.common import aio_do_parse, read_fn
from mineru.utils.cli_parser import arg_parse
from mineru.version import __version__ from mineru.version import __version__
app = FastAPI() app = FastAPI()
...@@ -50,6 +51,10 @@ async def parse_pdf( ...@@ -50,6 +51,10 @@ async def parse_pdf(
start_page_id: int = Form(0), start_page_id: int = Form(0),
end_page_id: int = Form(99999), end_page_id: int = Form(99999),
): ):
# 获取命令行配置参数
config = getattr(app.state, "config", {})
try: try:
# 创建唯一的输出目录 # 创建唯一的输出目录
unique_dir = os.path.join(output_dir, str(uuid.uuid4())) unique_dir = os.path.join(output_dir, str(uuid.uuid4()))
...@@ -113,6 +118,7 @@ async def parse_pdf( ...@@ -113,6 +118,7 @@ async def parse_pdf(
f_dump_content_list=return_content_list, f_dump_content_list=return_content_list,
start_page_id=start_page_id, start_page_id=start_page_id,
end_page_id=end_page_id, end_page_id=end_page_id,
**config
) )
# 构建结果路径 # 构建结果路径
...@@ -162,24 +168,29 @@ async def parse_pdf( ...@@ -162,24 +168,29 @@ async def parse_pdf(
) )
def main(): @click.command(context_settings=dict(ignore_unknown_options=True, allow_extra_args=True))
"""启动MinerU FastAPI服务器的命令行入口""" @click.pass_context
parser = argparse.ArgumentParser(description='Start MinerU FastAPI Service') @click.option('--host', default='127.0.0.1', help='Server host (default: 127.0.0.1)')
parser.add_argument('--host', type=str, default='127.0.0.1', help='Server host (default: 127.0.0.1)') @click.option('--port', default=8000, type=int, help='Server port (default: 8000)')
parser.add_argument('--port', type=int, default=8000, help='Server port (default: 8000)') @click.option('--reload', is_flag=True, help='Enable auto-reload (development mode)')
parser.add_argument('--reload', action='store_true', help='Enable auto-reload (development mode)') def main(ctx, host, port, reload, **kwargs):
args = parser.parse_args()
print(f"Start MinerU FastAPI Service: http://{args.host}:{args.port}") kwargs.update(arg_parse(ctx))
# 将配置参数存储到应用状态中
app.state.config = kwargs
"""启动MinerU FastAPI服务器的命令行入口"""
print(f"Start MinerU FastAPI Service: http://{host}:{port}")
print("The API documentation can be accessed at the following address:") print("The API documentation can be accessed at the following address:")
print(f"- Swagger UI: http://{args.host}:{args.port}/docs") print(f"- Swagger UI: http://{host}:{port}/docs")
print(f"- ReDoc: http://{args.host}:{args.port}/redoc") print(f"- ReDoc: http://{host}:{port}/redoc")
uvicorn.run( uvicorn.run(
"mineru.cli.fast_api:app", "mineru.cli.fast_api:app",
host=args.host, host=host,
port=args.port, port=port,
reload=args.reload reload=reload
) )
......
...@@ -13,6 +13,7 @@ from gradio_pdf import PDF ...@@ -13,6 +13,7 @@ from gradio_pdf import PDF
from loguru import logger from loguru import logger
from mineru.cli.common import prepare_env, read_fn, aio_do_parse, pdf_suffixes, image_suffixes from mineru.cli.common import prepare_env, read_fn, aio_do_parse, pdf_suffixes, image_suffixes
from mineru.utils.cli_parser import arg_parse
from mineru.utils.hash_utils import str_sha256 from mineru.utils.hash_utils import str_sha256
...@@ -237,42 +238,8 @@ def main(ctx, ...@@ -237,42 +238,8 @@ def main(ctx,
example_enable, sglang_engine_enable, api_enable, max_convert_pages, example_enable, sglang_engine_enable, api_enable, max_convert_pages,
server_name, server_port, **kwargs server_name, server_port, **kwargs
): ):
# 解析额外参数
extra_kwargs = {} kwargs.update(arg_parse(ctx))
i = 0
while i < len(ctx.args):
arg = ctx.args[i]
if arg.startswith('--'):
param_name = arg[2:].replace('-', '_') # 转换参数名格式
i += 1
if i < len(ctx.args) and not ctx.args[i].startswith('--'):
# 参数有值
try:
# 尝试转换为适当的类型
if ctx.args[i].lower() == 'true':
extra_kwargs[param_name] = True
elif ctx.args[i].lower() == 'false':
extra_kwargs[param_name] = False
elif '.' in ctx.args[i]:
try:
extra_kwargs[param_name] = float(ctx.args[i])
except ValueError:
extra_kwargs[param_name] = ctx.args[i]
else:
try:
extra_kwargs[param_name] = int(ctx.args[i])
except ValueError:
extra_kwargs[param_name] = ctx.args[i]
except:
extra_kwargs[param_name] = ctx.args[i]
else:
# 布尔型标志参数
extra_kwargs[param_name] = True
i -= 1
i += 1
# 将解析出的参数合并到kwargs
kwargs.update(extra_kwargs)
if sglang_engine_enable: if sglang_engine_enable:
try: try:
......
import click
def arg_parse(ctx: 'click.Context') -> dict:
# 解析额外参数
extra_kwargs = {}
i = 0
while i < len(ctx.args):
arg = ctx.args[i]
if arg.startswith('--'):
param_name = arg[2:].replace('-', '_') # 转换参数名格式
i += 1
if i < len(ctx.args) and not ctx.args[i].startswith('--'):
# 参数有值
try:
# 尝试转换为适当的类型
if ctx.args[i].lower() == 'true':
extra_kwargs[param_name] = True
elif ctx.args[i].lower() == 'false':
extra_kwargs[param_name] = False
elif '.' in ctx.args[i]:
try:
extra_kwargs[param_name] = float(ctx.args[i])
except ValueError:
extra_kwargs[param_name] = ctx.args[i]
else:
try:
extra_kwargs[param_name] = int(ctx.args[i])
except ValueError:
extra_kwargs[param_name] = ctx.args[i]
except:
extra_kwargs[param_name] = ctx.args[i]
else:
# 布尔型标志参数
extra_kwargs[param_name] = True
i -= 1
i += 1
return extra_kwargs
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment