Commit c2ad4c75 authored by zhougaofeng's avatar zhougaofeng
Browse files

Update pdf_extract_kit.py

parent 0135861f
......@@ -247,7 +247,7 @@ class CustomPEKModel:
logger.info('DocAnalysis init done!')
def __call__(self, image):
def __call__(self, image,index,end_page_id):
latex_filling_list = []
mf_image_list = []
......@@ -256,8 +256,8 @@ class CustomPEKModel:
layout_start = time.time()
layout_res = self.layout_model(image, ignore_catids=[])
layout_cost = round(time.time() - layout_start, 2)
logger.info(f"layout detection cost: {layout_cost}")
# logger.info(f"layout detection cost: {layout_cost}")
total_cost = layout_cost
if self.apply_formula:
# 公式检测
mfd_res = self.mfd_model.predict(image, imgsz=1888, conf=0.25, iou=0.45, verbose=True)[0]
......@@ -286,7 +286,7 @@ class CustomPEKModel:
for res, latex in zip(latex_filling_list, mfr_res):
res['latex'] = latex_rm_whitespace(latex)
mfr_cost = round(time.time() - mfr_start, 2)
logger.info(f"formula nums: {len(mf_image_list)}, mfr time: {mfr_cost}")
# logger.info(f"formula nums: {len(mf_image_list)}, mfr time: {mfr_cost}")
# Select regions for OCR / formula regions / table regions
ocr_res_list = []
......@@ -369,7 +369,11 @@ class CustomPEKModel:
})
ocr_cost = round(time.time() - ocr_start, 2)
logger.info(f"ocr cost: {ocr_cost}")
# logger.info(f"ocr cost: {ocr_cost}")
total_cost = total_cost + ocr_cost
index = index + 1
end_page_id = end_page_id + 1
logger.info(f'当前解析第【{index} / {end_page_id}】页, 耗时:{total_cost}')
#logger.info(f'是否表格识别:{self.apply_table}')
# 表格识别 table recognition
if self.apply_table:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment