Commit ca8788fb authored by 赵小蒙's avatar 赵小蒙
Browse files

update cli

parent 6e8e81c9
......@@ -48,6 +48,7 @@ import csv
import copy
parse_pdf_methods = click.Choice(["ocr", "txt", "auto"])
use_inside_model = False
def prepare_env(pdf_file_name, method):
......@@ -70,17 +71,17 @@ def write_to_csv(csv_file_path, csv_data):
def do_parse(
pdf_file_name,
pdf_bytes,
model_list,
parse_method,
f_draw_span_bbox=True,
f_draw_layout_bbox=True,
f_dump_md=True,
f_dump_middle_json=True,
f_dump_model_json=True,
f_dump_orig_pdf=True,
f_dump_content_list=True,
pdf_file_name,
pdf_bytes,
model_list,
parse_method,
f_draw_span_bbox=True,
f_draw_layout_bbox=True,
f_dump_md=True,
f_dump_middle_json=True,
f_dump_model_json=True,
f_dump_orig_pdf=True,
f_dump_content_list=True,
):
orig_model_list = copy.deepcopy(model_list)
......@@ -96,14 +97,18 @@ def do_parse(
elif parse_method == "ocr":
pipe = OCRPipe(pdf_bytes, model_list, image_writer, is_debug=True)
else:
print("unknown parse method")
logger.error("unknown parse method")
sys.exit(1)
pipe.pipe_classify()
"""如果没有传入有效的模型数据,则使用内置paddle解析"""
"""如果没有传入有效的模型数据,则使用内置model解析"""
if len(model_list) == 0:
pipe.pipe_analyze()
if use_inside_model:
pipe.pipe_analyze()
else:
logger.error("need model list input")
sys.exit(1)
pipe.pipe_parse()
pdf_info = pipe.pdf_mid_data["pdf_info"]
......@@ -267,7 +272,11 @@ def local_json_command(local_json, method):
help="指定解析方法。txt: 文本型 pdf 解析方法, ocr: 光学识别解析 pdf, auto: 程序智能选择解析方法",
default="auto",
)
def pdf_command(pdf, model, method):
@click.option("--inside_model", type=click.BOOL, default=False, help="使用内置模型测试")
def pdf_command(pdf, model, method, inside_model):
global use_inside_model
use_inside_model = inside_model
def read_fn(path):
disk_rw = DiskReaderWriter(os.path.dirname(path))
return disk_rw.read(os.path.basename(path), AbsReaderWriter.MODE_BIN)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment