Unverified Commit 809bf479 authored by Xiaomeng Zhao's avatar Xiaomeng Zhao Committed by GitHub
Browse files

Merge pull request #1062 from opendatalab/dev

fix(table): add null check for OCR result in rapid table prediction 
parents 958168b3 241d4895
...@@ -163,7 +163,9 @@ def doc_analyze(pdf_bytes: bytes, ocr: bool = False, show_log: bool = False, ...@@ -163,7 +163,9 @@ def doc_analyze(pdf_bytes: bytes, ocr: bool = False, show_log: bool = False,
page_width = img_dict["width"] page_width = img_dict["width"]
page_height = img_dict["height"] page_height = img_dict["height"]
if start_page_id <= index <= end_page_id: if start_page_id <= index <= end_page_id:
page_start = time.time()
result = custom_model(img) result = custom_model(img)
logger.info(f'-----page_id : {index}, page total time: {round(time.time() - page_start, 2)}-----')
else: else:
result = [] result = []
page_info = {"page_no": index, "height": page_height, "width": page_width} page_info = {"page_no": index, "height": page_height, "width": page_width}
......
...@@ -170,7 +170,6 @@ class CustomPEKModel: ...@@ -170,7 +170,6 @@ class CustomPEKModel:
logger.info('DocAnalysis init done!') logger.info('DocAnalysis init done!')
def __call__(self, image): def __call__(self, image):
page_start = time.time()
# layout检测 # layout检测
layout_start = time.time() layout_start = time.time()
...@@ -272,6 +271,4 @@ class CustomPEKModel: ...@@ -272,6 +271,4 @@ class CustomPEKModel:
) )
logger.info(f'table time: {round(time.time() - table_start, 2)}') logger.info(f'table time: {round(time.time() - table_start, 2)}')
logger.info(f'-----page total time: {round(time.time() - page_start, 2)}-----')
return layout_res return layout_res
...@@ -10,5 +10,7 @@ class RapidTableModel(object): ...@@ -10,5 +10,7 @@ class RapidTableModel(object):
def predict(self, image): def predict(self, image):
ocr_result, _ = self.ocr_engine(np.asarray(image)) ocr_result, _ = self.ocr_engine(np.asarray(image))
if ocr_result is None:
return None, None, None
html_code, table_cell_bboxes, elapse = self.table_model(np.asarray(image), ocr_result) html_code, table_cell_bboxes, elapse = self.table_model(np.asarray(image), ocr_result)
return html_code, table_cell_bboxes, elapse return html_code, table_cell_bboxes, elapse
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment