Unverified Commit 862891e2 authored by Wang Yubo's avatar Wang Yubo Committed by GitHub
Browse files

Update app.py: Fix parameter parsing in /file_parse endpoint

I have updated the `/file_parse` endpoint in `app.py` to correctly handle boolean and string parameters when they are sent via `multipart/form-data` requests (commonly used for file uploads). Previously, these parameters were not being properly parsed because FastAPI expects them to be passed as query or JSON body parameters by default.

### Changes Made:
- Added `Form(...)` to all non-file parameters (`parse_method`, `is_json_md_dump`, `output_dir`, and return flags like `return_layout`, etc.).
- This ensures that FastAPI correctly reads these fields from form-data, allowing clients to send both files and structured configuration options in the same request.

### Why This Change Was Needed:
- When using `requests.post(..., data=data, files=files)`, the `data` dictionary is sent as form-encoded data.
- Without explicitly declaring these fields with `Form(...)`, FastAPI does not bind them correctly, leading to default values always being used (e.g., `False` for boolean flags).
- This change allows the API to accurately reflect the client's intent and enables features like `return_layout`, `return_images`, etc., to work as expected.

This update improves compatibility with HTTP clients that rely on standard form-based file upload mechanisms while preserving the existing behavior of the API.
parent 2aaf2310
...@@ -21,6 +21,7 @@ from magic_pdf.libs.config_reader import get_bucket_name, get_s3_config ...@@ -21,6 +21,7 @@ from magic_pdf.libs.config_reader import get_bucket_name, get_s3_config
from magic_pdf.model.doc_analyze_by_custom_model import doc_analyze from magic_pdf.model.doc_analyze_by_custom_model import doc_analyze
from magic_pdf.operators.models import InferenceResult from magic_pdf.operators.models import InferenceResult
from magic_pdf.operators.pipes import PipeResult from magic_pdf.operators.pipes import PipeResult
from fastapi import Form
model_config.__use_inside_model__ = True model_config.__use_inside_model__ = True
...@@ -102,6 +103,7 @@ def init_writers( ...@@ -102,6 +103,7 @@ def init_writers(
# 处理上传的文件 # 处理上传的文件
file_bytes = file.file.read() file_bytes = file.file.read()
file_extension = os.path.splitext(file.filename)[1] file_extension = os.path.splitext(file.filename)[1]
writer = FileBasedDataWriter(output_path) writer = FileBasedDataWriter(output_path)
image_writer = FileBasedDataWriter(output_image_path) image_writer = FileBasedDataWriter(output_image_path)
os.makedirs(output_image_path, exist_ok=True) os.makedirs(output_image_path, exist_ok=True)
...@@ -176,14 +178,14 @@ def encode_image(image_path: str) -> str: ...@@ -176,14 +178,14 @@ def encode_image(image_path: str) -> str:
) )
async def file_parse( async def file_parse(
file: UploadFile = None, file: UploadFile = None,
file_path: str = None, file_path: str = Form(None),
parse_method: str = "auto", parse_method: str = Form("auto"),
is_json_md_dump: bool = False, is_json_md_dump: bool = Form(False),
output_dir: str = "output", output_dir: str = Form("output"),
return_layout: bool = False, return_layout: bool = Form(False),
return_info: bool = False, return_info: bool = Form(False),
return_content_list: bool = False, return_content_list: bool = Form(False),
return_images: bool = False, return_images: bool = Form(False),
): ):
""" """
Execute the process of converting PDF to JSON and MD, outputting MD and JSON files Execute the process of converting PDF to JSON and MD, outputting MD and JSON files
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment