Commit 6b296ee2 authored by myhloli's avatar myhloli
Browse files

fix(pdf_parse): improve OCR result handling

- Add null check for OCR results to prevent errors on empty lists
- Enhance robustness of OCR text processing in the magic-pdf project
parent f1e2f084
......@@ -222,7 +222,7 @@ def txt_spans_extract_v2(pdf_page, spans, all_bboxes, all_discarded_blocks, lang
ocr_res = ocr_model.ocr(span_img, det=False)
# logger.info(f"ocr_res: {ocr_res}")
# logger.info(f"empty_span: {span}")
if len(ocr_res) > 0:
if ocr_res and len(ocr_res) > 0:
if len(ocr_res[0]) > 0:
ocr_text, ocr_score = ocr_res[0][0]
if ocr_score > 0.5 and len(ocr_text) > 0:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment