Commit 78340ac2 authored by zhougaofeng's avatar zhougaofeng
Browse files

Update doc_analyze_by_custom_model.py

parent 8b230796
...@@ -95,7 +95,7 @@ def custom_model_init(ocr: bool = False, show_log: bool = False): ...@@ -95,7 +95,7 @@ def custom_model_init(ocr: bool = False, show_log: bool = False):
else: else:
logger.error("Not allow model_name!") logger.error("Not allow model_name!")
exit(1) exit(1)
model_init_cost = time.time() - model_init_start model_init_cost = round(time.time() - model_init_start,2)
logger.info(f"model init cost: {model_init_cost}") logger.info(f"model init cost: {model_init_cost}")
else: else:
logger.error("use_inside_model is False, not allow to use inside model") logger.error("use_inside_model is False, not allow to use inside model")
...@@ -104,11 +104,12 @@ def custom_model_init(ocr: bool = False, show_log: bool = False): ...@@ -104,11 +104,12 @@ def custom_model_init(ocr: bool = False, show_log: bool = False):
return custom_model return custom_model
def doc_analyze(pdf_bytes: bytes, ocr: bool = False, show_log: bool = False, def doc_analyze(model,pdf_bytes: bytes, ocr: bool = False, show_log: bool = False,
start_page_id=0, end_page_id=None): start_page_id=0, end_page_id=None):
model_manager = ModelSingleton() # model_manager = ModelSingleton()
custom_model = model_manager.get_model(ocr, show_log) # custom_model = model_manager.get_model(ocr, show_log)
custom_model = model
images = load_images_from_pdf(pdf_bytes) images = load_images_from_pdf(pdf_bytes)
...@@ -133,7 +134,7 @@ def doc_analyze(pdf_bytes: bytes, ocr: bool = False, show_log: bool = False, ...@@ -133,7 +134,7 @@ def doc_analyze(pdf_bytes: bytes, ocr: bool = False, show_log: bool = False,
page_info = {"page_no": index, "height": page_height, "width": page_width} page_info = {"page_no": index, "height": page_height, "width": page_width}
page_dict = {"layout_dets": result, "page_info": page_info} page_dict = {"layout_dets": result, "page_info": page_info}
model_json.append(page_dict) model_json.append(page_dict)
doc_analyze_cost = time.time() - doc_analyze_start doc_analyze_cost = round(time.time() - doc_analyze_start,2)
logger.info(f"文件分析提取截图共耗时: {doc_analyze_cost}") logger.info(f"文件分析提取截图共耗时: {doc_analyze_cost}")
# logger.info(f'model_json:\n{model_json}') # logger.info(f'model_json:\n{model_json}')
return model_json return model_json
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment