Commit ea730ae2 authored by myhloli's avatar myhloli
Browse files

refactor(ocr): improve OCR score precision to three decimal places

- Update OCR score formatting in batch_analyze.py and pdf_parse_union_core_v2.py
- Change score rounding method to preserve three decimal places
- Enhance accuracy representation without significantly altering the score value
parent 90f0e737
......@@ -241,7 +241,7 @@ class BatchAnalyze:
for index, layout_res_item in enumerate(need_ocr_lists_by_lang[lang]):
ocr_text, ocr_score = ocr_res_list[index]
layout_res_item['text'] = ocr_text
layout_res_item['score'] = float(round(ocr_score, 2))
layout_res_item['score'] = float(f"{ocr_score:.3f}")
total_processed += len(img_crop_list)
......
......@@ -997,7 +997,7 @@ def pdf_parse_union(
for index, span in enumerate(need_ocr_list):
ocr_text, ocr_score = ocr_res_list[index]
span['content'] = ocr_text
span['score'] = float(round(ocr_score, 2))
span['score'] = float(f"{ocr_score:.3f}")
# rec_time = time.time() - rec_start
# logger.info(f'ocr-dynamic-rec time: {round(rec_time, 2)}, total images processed: {len(img_crop_list)}')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment