@app.post("/pdf_parse",tags=["projects"],summary="Parse PDF file")
@app.post('/pdf_parse',tags=['projects'],summary='Parse PDF file')
asyncdefpdf_parse_main(
asyncdefpdf_parse_main(
pdf_file:UploadFile=File(...),
pdf_file:UploadFile=File(...),
parse_method:str='auto',
parse_method:str='auto',
model_json_path:str=None,
model_json_path:str=None,
is_json_md_dump:bool=True,
is_json_md_dump:bool=True,
output_dir:str="output"
output_dir:str='output',
):
):
"""
"""Execute the process of converting PDF to JSON and MD, outputting MD and
Execute the process of converting PDF to JSON and MD, outputting MD and JSON files to the specified directory
JSON files to the specified directory.
:param pdf_file: The PDF file to be parsed
:param pdf_file: The PDF file to be parsed
:param parse_method: Parsing method, can be auto, ocr, or txt. Default is auto. If results are not satisfactory, try ocr
:param parse_method: Parsing method, can be auto, ocr, or txt. Default is auto. If results are not satisfactory, try ocr
:param model_json_path: Path to existing model data file. If empty, use built-in model. PDF and model_json must correspond
:param model_json_path: Path to existing model data file. If empty, use built-in model. PDF and model_json must correspond
:param is_json_md_dump: Whether to write parsed data to .json and .md files. Default is True. Different stages of data will be written to different .json files (3 in total), md content will be saved to .md file
:param is_json_md_dump: Whether to write parsed data to .json and .md files. Default is True. Different stages of data will be written to different .json files (3 in total), md content will be saved to .md file # noqa E501
:param output_dir: Output directory for results. A folder named after the PDF file will be created to store all results
:param output_dir: Output directory for results. A folder named after the PDF file will be created to store all results
"""
"""
try:
try:
# Create a temporary file to store the uploaded PDF
# Create a temporary file to store the uploaded PDF