TXTPipe.py 2.08 KB
Newer Older
1
2
3
from loguru import logger

from magic_pdf.libs.MakeContentConfig import DropMode, MakeMode
赵小蒙's avatar
赵小蒙 committed
4
from magic_pdf.model.doc_analyze_by_custom_model import doc_analyze
kernel.h@qq.com's avatar
kernel.h@qq.com committed
5
from magic_pdf.rw.AbsReaderWriter import AbsReaderWriter
赵小蒙's avatar
赵小蒙 committed
6
7
8
9
10
11
12
from magic_pdf.libs.json_compressor import JsonCompressor
from magic_pdf.pipe.AbsPipe import AbsPipe
from magic_pdf.user_api import parse_txt_pdf


class TXTPipe(AbsPipe):

13
    def __init__(self, pdf_bytes: bytes, model_list: list, image_writer: AbsReaderWriter, is_debug: bool = False,
quyuan's avatar
quyuan committed
14
<<<<<<< HEAD
15
16
                 start_page_id=0, end_page_id=None, lang=None):
        super().__init__(pdf_bytes, model_list, image_writer, is_debug, start_page_id, end_page_id, lang)
quyuan's avatar
quyuan committed
17
=======
drunkpig's avatar
drunkpig committed
18
19
                 start_page_id=0, end_page_id=None):
        super().__init__(pdf_bytes, model_list, image_writer, is_debug, start_page_id, end_page_id)
quyuan's avatar
quyuan committed
20
>>>>>>> 0140d7d271ac3b1561ca2272030e9e038b469999
赵小蒙's avatar
赵小蒙 committed
21
22
23
24

    def pipe_classify(self):
        pass

25
    def pipe_analyze(self):
26
        self.model_list = doc_analyze(self.pdf_bytes, ocr=False,
quyuan's avatar
quyuan committed
27
<<<<<<< HEAD
28
29
                                      start_page_id=self.start_page_id, end_page_id=self.end_page_id,
                                      lang=self.lang)
quyuan's avatar
quyuan committed
30
=======
drunkpig's avatar
drunkpig committed
31
                                      start_page_id=self.start_page_id, end_page_id=self.end_page_id)
quyuan's avatar
quyuan committed
32
>>>>>>> 0140d7d271ac3b1561ca2272030e9e038b469999
33

赵小蒙's avatar
赵小蒙 committed
34
    def pipe_parse(self):
35
36
        self.pdf_mid_data = parse_txt_pdf(self.pdf_bytes, self.model_list, self.image_writer, is_debug=self.is_debug,
                                          start_page_id=self.start_page_id, end_page_id=self.end_page_id)
赵小蒙's avatar
赵小蒙 committed
37

38
    def pipe_mk_uni_format(self, img_parent_path: str, drop_mode=DropMode.WHOLE_PDF):
39
40
41
42
43
44
45
46
        result = super().pipe_mk_uni_format(img_parent_path, drop_mode)
        logger.info("txt_pipe mk content list finished")
        return result

    def pipe_mk_markdown(self, img_parent_path: str, drop_mode=DropMode.WHOLE_PDF, md_make_mode=MakeMode.MM_MD):
        result = super().pipe_mk_markdown(img_parent_path, drop_mode, md_make_mode)
        logger.info(f"txt_pipe mk {md_make_mode} finished")
        return result