"tests/git@developer.sourcefind.cn:OpenDAS/mmdetection3d.git" did not exist on "3870001ad33d09a3a691375c4bd2de7f56b29808"
Commit f52c6249 authored by 赵小蒙's avatar 赵小蒙
Browse files

更新路径输入和markdown输出逻辑

parent ca7059e5
import json
import os import os
import sys import sys
from pathlib import Path from pathlib import Path
...@@ -6,7 +7,7 @@ import click ...@@ -6,7 +7,7 @@ import click
from loguru import logger from loguru import logger
from magic_pdf.libs.commons import join_path, read_file from magic_pdf.libs.commons import join_path, read_file
from magic_pdf.dict2md.mkcontent import mk_mm_markdown from magic_pdf.dict2md.mkcontent import mk_mm_markdown, mk_universal_format
from magic_pdf.pipeline import parse_pdf_by_model from magic_pdf.pipeline import parse_pdf_by_model
...@@ -32,7 +33,8 @@ def main(s3_pdf_path: str, s3_pdf_profile: str, pdf_model_path: str, pdf_model_p ...@@ -32,7 +33,8 @@ def main(s3_pdf_path: str, s3_pdf_profile: str, pdf_model_path: str, pdf_model_p
os.makedirs(parent_dir) os.makedirs(parent_dir)
if not paras_dict.get('need_drop'): if not paras_dict.get('need_drop'):
markdown_content = mk_mm_markdown(paras_dict) content_list = mk_universal_format(paras_dict)
markdown_content = mk_mm_markdown(content_list)
else: else:
markdown_content = paras_dict['drop_reason'] markdown_content = paras_dict['drop_reason']
...@@ -70,8 +72,8 @@ def main_shell(pdf_file_path: str, save_path: str): ...@@ -70,8 +72,8 @@ def main_shell(pdf_file_path: str, save_path: str):
@click.command() @click.command()
@click.option("--pdf-dir", help="s3上pdf文件的路径") @click.option("--pdf-dir", help="本地pdf文件的路径")
@click.option("--model-dir", help="s3上pdf文件的路径") @click.option("--model-dir", help="本地模型文件的路径")
@click.option("--start-page-num", default=0, help="从第几页开始解析") @click.option("--start-page-num", default=0, help="从第几页开始解析")
def main_shell2(pdf_dir: str, model_dir: str,start_page_num: int): def main_shell2(pdf_dir: str, model_dir: str,start_page_num: int):
# 先扫描所有的pdf目录里的文件名字 # 先扫描所有的pdf目录里的文件名字
...@@ -86,8 +88,10 @@ def main_shell2(pdf_dir: str, model_dir: str,start_page_num: int): ...@@ -86,8 +88,10 @@ def main_shell2(pdf_dir: str, model_dir: str,start_page_num: int):
for pdf_file in pdf_file_names: for pdf_file in pdf_file_names:
pdf_file_path = os.path.join(pdf_dir, pdf_file) pdf_file_path = os.path.join(pdf_dir, pdf_file)
model_file_path = os.path.join(model_dir, pdf_file) model_file_path = os.path.join(model_dir, pdf_file).rstrip(".pdf") + ".json"
main(pdf_file_path, None, model_file_path, None, start_page_num) with open(model_file_path, "r") as json_file:
model_list = json.load(json_file)
main(pdf_file_path, None, model_list, None, start_page_num)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment