Unverified Commit b8aab269 authored by Xiaomeng Zhao's avatar Xiaomeng Zhao Committed by GitHub
Browse files

Merge pull request #1211 from myhloli/dev

refactor(magic_pdf): remove unused threading lock and model initialization code
parents ebfd6fd9 a1744b77
...@@ -28,8 +28,6 @@ from magic_pdf.model.sub_modules.model_utils import ( ...@@ -28,8 +28,6 @@ from magic_pdf.model.sub_modules.model_utils import (
from magic_pdf.model.sub_modules.ocr.paddleocr.ocr_utils import ( from magic_pdf.model.sub_modules.ocr.paddleocr.ocr_utils import (
get_adjusted_mfdetrec_res, get_ocr_result_list) get_adjusted_mfdetrec_res, get_ocr_result_list)
from threading import Lock
class CustomPEKModel: class CustomPEKModel:
...@@ -37,7 +35,6 @@ class CustomPEKModel: ...@@ -37,7 +35,6 @@ class CustomPEKModel:
""" """
======== model init ======== ======== model init ========
""" """
self._lock = Lock()
# 获取当前文件(即 pdf_extract_kit.py)的绝对路径 # 获取当前文件(即 pdf_extract_kit.py)的绝对路径
current_file_path = os.path.abspath(__file__) current_file_path = os.path.abspath(__file__)
# 获取当前文件所在的目录(model) # 获取当前文件所在的目录(model)
...@@ -153,12 +150,6 @@ class CustomPEKModel: ...@@ -153,12 +150,6 @@ class CustomPEKModel:
device=self.device, device=self.device,
) )
# 初始化ocr # 初始化ocr
# self.ocr_model = atom_model_manager.get_atom_model(
# atom_model_name=AtomicModel.OCR,
# ocr_show_log=show_log,
# det_db_box_thresh=0.3,
# lang=self.lang
# )
self.ocr_model = ocr_model_init( self.ocr_model = ocr_model_init(
show_log=show_log, show_log=show_log,
det_db_box_thresh=0.3, det_db_box_thresh=0.3,
...@@ -223,7 +214,7 @@ class CustomPEKModel: ...@@ -223,7 +214,7 @@ class CustomPEKModel:
# OCR recognition # OCR recognition
new_image = cv2.cvtColor(np.asarray(new_image), cv2.COLOR_RGB2BGR) new_image = cv2.cvtColor(np.asarray(new_image), cv2.COLOR_RGB2BGR)
# with self._lock:
if self.apply_ocr: if self.apply_ocr:
ocr_res = self.ocr_model.ocr(new_image, mfd_res=adjusted_mfdetrec_res)[0] ocr_res = self.ocr_model.ocr(new_image, mfd_res=adjusted_mfdetrec_res)[0]
else: else:
......
...@@ -31,7 +31,7 @@ try: ...@@ -31,7 +31,7 @@ try:
except ImportError: except ImportError:
pass pass
from magic_pdf.model.sub_modules.model_init import AtomModelSingleton, ocr_model_init from magic_pdf.model.sub_modules.model_init import ocr_model_init
from magic_pdf.para.para_split_v3 import para_split from magic_pdf.para.para_split_v3 import para_split
from magic_pdf.pre_proc.construct_page_dict import ocr_construct_page_component_v2 from magic_pdf.pre_proc.construct_page_dict import ocr_construct_page_component_v2
from magic_pdf.pre_proc.cut_image import ocr_cut_image_and_table from magic_pdf.pre_proc.cut_image import ocr_cut_image_and_table
...@@ -231,13 +231,7 @@ def txt_spans_extract_v2(pdf_page, spans, all_bboxes, all_discarded_blocks, lang ...@@ -231,13 +231,7 @@ def txt_spans_extract_v2(pdf_page, spans, all_bboxes, all_discarded_blocks, lang
if len(empty_spans) > 0: if len(empty_spans) > 0:
# 初始化ocr模型 # 初始化ocr模型
# atom_model_manager = AtomModelSingleton()
# ocr_model = atom_model_manager.get_atom_model(
# atom_model_name="ocr",
# ocr_show_log=False,
# det_db_box_thresh=0.3,
# lang=lang
# )
ocr_model = ocr_model_init( ocr_model = ocr_model_init(
show_log=False, show_log=False,
det_db_box_thresh=0.3, det_db_box_thresh=0.3,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment