"tests/vscode:/vscode.git/clone" did not exist on "ea311e6989ca8b36b765450f57f13901c03cb0af"
Commit 6bfc1711 authored by myhloli's avatar myhloli
Browse files

refactor(pdf_parse): comment out performance measurement and logging

- Comment out @measure_time decorator for txt_spans_extract_v2 and sort_lines_by_model functions
- Remove logger.info for page_process_time
- Comment out PerformanceStats.print_stats call
parent e516cf53
......@@ -218,7 +218,7 @@ def calculate_contrast(img, img_mode) -> float:
# logger.info(f"contrast: {contrast}")
return round(contrast, 2)
@measure_time
# @measure_time
def txt_spans_extract_v2(pdf_page, spans, all_bboxes, all_discarded_blocks, lang):
# cid用0xfffd表示,连字符拆开
# text_blocks_raw = pdf_page.get_text('rawdict', flags=fitz.TEXT_PRESERVE_WHITESPACE | fitz.TEXT_MEDIABOX_CLIP)['blocks']
......@@ -492,7 +492,7 @@ def insert_lines_into_block(block_bbox, line_height, page_w, page_h):
else:
return [[x0, y0, x1, y1]]
@measure_time
# @measure_time
def sort_lines_by_model(fix_blocks, page_w, page_h, line_height):
page_line_list = []
......@@ -962,12 +962,7 @@ def pdf_parse_union(
)
pdf_info_dict[f'page_{page_id}'] = page_info
logger.info(
f'page_process_time: {round(time.time() - start_time, 2)}'
)
PerformanceStats.print_stats()
# PerformanceStats.print_stats()
"""分段"""
para_split(pdf_info_dict)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment