Commit 302a6950 authored by xu rui's avatar xu rui
Browse files

feat: remove pipe_auto_mode

parent 3062217d
...@@ -65,31 +65,6 @@ class InferenceResultBase(ABC): ...@@ -65,31 +65,6 @@ class InferenceResultBase(ABC):
""" """
pass pass
@abstractmethod
def pipe_auto_mode(
self,
imageWriter: DataWriter,
start_page_id=0,
end_page_id=None,
debug_mode=False,
lang=None,
) -> PipeResult:
"""Post-proc the model inference result.
step1: classify the dataset type
step2: based the result of step1, using `pipe_txt_mode` or `pipe_ocr_mode`
Args:
imageWriter (DataWriter): the image writer handle
start_page_id (int, optional): Defaults to 0. Let user select some pages He/She want to process
end_page_id (int, optional): Defaults to the last page index of dataset. Let user select some pages He/She want to process
debug_mode (bool, optional): Defaults to False. will dump more log if enabled
lang (str, optional): Defaults to None.
Returns:
PipeResult: the result
"""
pass
@abstractmethod @abstractmethod
def pipe_txt_mode( def pipe_txt_mode(
self, self,
......
...@@ -71,40 +71,6 @@ class InferenceResult(InferenceResultBase): ...@@ -71,40 +71,6 @@ class InferenceResult(InferenceResultBase):
""" """
return proc(copy.deepcopy(self._infer_res), *args, **kwargs) return proc(copy.deepcopy(self._infer_res), *args, **kwargs)
def pipe_auto_mode(
self,
imageWriter: DataWriter,
start_page_id=0,
end_page_id=None,
debug_mode=False,
lang=None,
) -> PipeResult:
"""Post-proc the model inference result.
step1: classify the dataset type
step2: based the result of step1, using `pipe_txt_mode` or `pipe_ocr_mode`
Args:
imageWriter (DataWriter): the image writer handle
start_page_id (int, optional): Defaults to 0. Let user select some pages He/She want to process
end_page_id (int, optional): Defaults to the last page index of dataset. Let user select some pages He/She want to process
debug_mode (bool, optional): Defaults to False. will dump more log if enabled
lang (str, optional): Defaults to None.
Returns:
PipeResult: the result
"""
pdf_proc_method = classify(self._dataset.data_bits())
if pdf_proc_method == SupportedPdfParseMethod.TXT:
return self.pipe_txt_mode(
imageWriter, start_page_id, end_page_id, debug_mode, lang
)
else:
return self.pipe_ocr_mode(
imageWriter, start_page_id, end_page_id, debug_mode, lang
)
def pipe_txt_mode( def pipe_txt_mode(
self, self,
imageWriter: DataWriter, imageWriter: DataWriter,
......
...@@ -170,6 +170,7 @@ def do_parse( ...@@ -170,6 +170,7 @@ def do_parse(
logger.error('need model list input') logger.error('need model list input')
exit(2) exit(2)
else: else:
infer_result = InferenceResult(model_list, ds) infer_result = InferenceResult(model_list, ds)
if parse_method == 'ocr': if parse_method == 'ocr':
pipe_result = infer_result.pipe_ocr_mode( pipe_result = infer_result.pipe_ocr_mode(
...@@ -180,9 +181,15 @@ def do_parse( ...@@ -180,9 +181,15 @@ def do_parse(
image_writer, debug_mode=True, lang=lang image_writer, debug_mode=True, lang=lang
) )
else: else:
pipe_result = infer_result.pipe_auto_mode( if ds.classify() == SupportedPdfParseMethod.TXT:
image_writer, debug_mode=True, lang=lang pipe_result = infer_result.pipe_txt_mode(
) image_writer, debug_mode=True, lang=lang
)
else:
pipe_result = infer_result.pipe_txt_mode(
image_writer, debug_mode=True, lang=lang
)
if f_draw_model_bbox: if f_draw_model_bbox:
infer_result.draw_model( infer_result.draw_model(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment