Commit 2bdb5445 authored by JesseChen1031's avatar JesseChen1031
Browse files

update api path and documents

parent 102fe277
...@@ -64,14 +64,13 @@ def init_writers( ...@@ -64,14 +64,13 @@ def init_writers(
Initialize writers based on path type Initialize writers based on path type
Args: Args:
pdf_path: PDF file path (local path or S3 path) file_path: file path (local path or S3 path)
pdf_file: Uploaded PDF file object file: Uploaded file object
output_path: Output directory path output_path: Output directory path
output_image_path: Image output directory path output_image_path: Image output directory path
Returns: Returns:
Tuple[writer, image_writer, pdf_bytes]: Returns initialized writer tuple and PDF Tuple[writer, image_writer, file_bytes]: Returns initialized writer tuple and file content
file content
""" """
file_extension:str = None file_extension:str = None
if file_path: if file_path:
...@@ -120,7 +119,8 @@ def process_file( ...@@ -120,7 +119,8 @@ def process_file(
Process PDF file content Process PDF file content
Args: Args:
pdf_bytes: Binary content of PDF file file_bytes: Binary content of file
file_extension: file extension
parse_method: Parse method ('ocr', 'txt', 'auto') parse_method: Parse method ('ocr', 'txt', 'auto')
image_writer: Image writer image_writer: Image writer
...@@ -170,9 +170,9 @@ def encode_image(image_path: str) -> str: ...@@ -170,9 +170,9 @@ def encode_image(image_path: str) -> str:
@app.post( @app.post(
"/pdf_parse", "/file_parse",
tags=["projects"], tags=["projects"],
summary="Parse PDF files (supports local files and S3)", summary="Parse files (supports local files and S3)",
) )
async def file_parse( async def file_parse(
file: UploadFile = None, file: UploadFile = None,
...@@ -190,10 +190,10 @@ async def file_parse( ...@@ -190,10 +190,10 @@ async def file_parse(
to the specified directory. to the specified directory.
Args: Args:
pdf_file: The PDF file to be parsed. Must not be specified together with file: The PDF file to be parsed. Must not be specified together with
`pdf_path` `file_path`
pdf_path: The path to the PDF file to be parsed. Must not be specified together file_path: The path to the PDF file to be parsed. Must not be specified together
with `pdf_file` with `file`
parse_method: Parsing method, can be auto, ocr, or txt. Default is auto. If parse_method: Parsing method, can be auto, ocr, or txt. Default is auto. If
results are not satisfactory, try ocr results are not satisfactory, try ocr
is_json_md_dump: Whether to write parsed data to .json and .md files. Default is_json_md_dump: Whether to write parsed data to .json and .md files. Default
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment