Commit 3b7342b8 authored by 赵小蒙's avatar 赵小蒙
Browse files

update cli output files

parent 9dc5033c
...@@ -100,18 +100,34 @@ def _do_parse(pdf_file_name, pdf_bytes, model_list, parse_method, image_writer, ...@@ -100,18 +100,34 @@ def _do_parse(pdf_file_name, pdf_bytes, model_list, parse_method, image_writer,
# [pdf_file_name, pipe.pdf_mid_data['not_common_character_rate'], pipe.pdf_mid_data['not_printable_rate']]) # [pdf_file_name, pipe.pdf_mid_data['not_common_character_rate'], pipe.pdf_mid_data['not_printable_rate']])
md_content = pipe.pipe_mk_markdown(image_dir, drop_mode=DropMode.NONE) md_content = pipe.pipe_mk_markdown(image_dir, drop_mode=DropMode.NONE)
'''写markdown'''
md_writer.write( md_writer.write(
content=md_content, path=f"{pdf_file_name}.md", mode=AbsReaderWriter.MODE_TXT content=md_content, path=f"{pdf_file_name}.md", mode=AbsReaderWriter.MODE_TXT
) )
'''写middle_json'''
md_writer.write( md_writer.write(
content=json_parse.dumps(pipe.pdf_mid_data, ensure_ascii=False, indent=4), content=json_parse.dumps(pipe.pdf_mid_data, ensure_ascii=False, indent=4),
path=f"{pdf_file_name}.json", path=f"{pdf_file_name}_middle.json",
mode=AbsReaderWriter.MODE_TXT, mode=AbsReaderWriter.MODE_TXT,
) )
'''写model_json'''
md_writer.write(
content=json_parse.dumps(pipe.model_list, ensure_ascii=False, indent=4),
path=f"{pdf_file_name}_model.json",
mode=AbsReaderWriter.MODE_TXT,
)
'''写源pdf'''
md_writer.write(
content=pdf_bytes,
path=f"{pdf_file_name}_origin.json",
mode=AbsReaderWriter.MODE_BIN,
)
content_list = pipe.pipe_mk_uni_format(image_dir, drop_mode=DropMode.NONE) content_list = pipe.pipe_mk_uni_format(image_dir, drop_mode=DropMode.NONE)
'''写content_list'''
md_writer.write( md_writer.write(
str(content_list), f"{pdf_file_name}.txt", AbsReaderWriter.MODE_TXT content=json_parse.dumps(content_list, ensure_ascii=False, indent=4),
path=f"{pdf_file_name}_content_list.json",
mode=AbsReaderWriter.MODE_TXT
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment