TXTPipe.py 1.07 KB
Newer Older
1
from magic_pdf.libs.MakeContentConfig import DropMode
kernel.h@qq.com's avatar
kernel.h@qq.com committed
2
from magic_pdf.rw.AbsReaderWriter import AbsReaderWriter
赵小蒙's avatar
赵小蒙 committed
3
4
5
6
7
8
9
from magic_pdf.libs.json_compressor import JsonCompressor
from magic_pdf.pipe.AbsPipe import AbsPipe
from magic_pdf.user_api import parse_txt_pdf


class TXTPipe(AbsPipe):

10
11
    def __init__(self, pdf_bytes: bytes, model_list: list, image_writer: AbsReaderWriter, is_debug: bool=False):
        super().__init__(pdf_bytes, model_list, image_writer, is_debug)
赵小蒙's avatar
赵小蒙 committed
12
13
14
15
16

    def pipe_classify(self):
        pass

    def pipe_parse(self):
kernel.h@qq.com's avatar
kernel.h@qq.com committed
17
        self.pdf_mid_data = parse_txt_pdf(self.pdf_bytes, self.model_list, self.image_writer, is_debug=self.is_debug)
赵小蒙's avatar
赵小蒙 committed
18

19
20
    def pipe_mk_uni_format(self, img_parent_path: str, drop_mode=DropMode.WHOLE_PDF):
        content_list = AbsPipe.mk_uni_format(self.get_compress_pdf_mid_data(), img_parent_path, drop_mode)
赵小蒙's avatar
赵小蒙 committed
21
22
        return content_list

23
24
    def pipe_mk_markdown(self, img_parent_path: str, drop_mode=DropMode.WHOLE_PDF):
        md_content = AbsPipe.mk_markdown(self.get_compress_pdf_mid_data(), img_parent_path, drop_mode)
赵小蒙's avatar
赵小蒙 committed
25
        return md_content