Commit 07abba71 authored by 赵小蒙's avatar 赵小蒙
Browse files

draw_bbox工具类逻辑更新

parent 63969109
from magic_pdf.libs.commons import fitz # PyMuPDF from magic_pdf.libs.commons import fitz # PyMuPDF
def draw_bbox(i, bbox_list, page, rgb_config): def draw_bbox_without_number(i, bbox_list, page, rgb_config):
new_rgb = [] new_rgb = []
for item in rgb_config: for item in rgb_config:
item = float(item) / 255 item = float(item) / 255
...@@ -12,6 +12,19 @@ def draw_bbox(i, bbox_list, page, rgb_config): ...@@ -12,6 +12,19 @@ def draw_bbox(i, bbox_list, page, rgb_config):
page.draw_rect(rect_coords, color=new_rgb, fill=None, width=0.5, overlay=True) # Draw the rectangle page.draw_rect(rect_coords, color=new_rgb, fill=None, width=0.5, overlay=True) # Draw the rectangle
def draw_bbox_with_number(i, bbox_list, page, rgb_config):
new_rgb = []
for item in rgb_config:
item = float(item) / 255
new_rgb.append(item)
page_data = bbox_list[i]
for j, bbox in enumerate(page_data):
x0, y0, x1, y1 = bbox
rect_coords = fitz.Rect(x0, y0, x1, y1) # Define the rectangle
page.draw_rect(rect_coords, color=new_rgb, fill=None, width=0.5, overlay=True) # Draw the rectangle
page.insert_text((x0, y0), str(j + 1), fontsize=10, color=new_rgb) # Insert the index at the top left corner of the rectangle
def draw_layout_bbox(pdf_info_dict, input_path, out_path): def draw_layout_bbox(pdf_info_dict, input_path, out_path):
layout_bbox_list = [] layout_bbox_list = []
for page in pdf_info_dict.values(): for page in pdf_info_dict.values():
...@@ -22,13 +35,7 @@ def draw_layout_bbox(pdf_info_dict, input_path, out_path): ...@@ -22,13 +35,7 @@ def draw_layout_bbox(pdf_info_dict, input_path, out_path):
doc = fitz.open(input_path) doc = fitz.open(input_path)
for i, page in enumerate(doc): for i, page in enumerate(doc):
# 获取当前页面的数据 draw_bbox_with_number(i, layout_bbox_list, page, [255, 0, 0])
page_data = layout_bbox_list[i]
for j, bbox in enumerate(page_data):
x0, y0, x1, y1 = bbox
rect_coords = fitz.Rect(x0, y0, x1, y1) # Define the rectangle
page.draw_rect(rect_coords, color=(1, 0, 0), fill=None, width=0.5, overlay=True) # Draw the rectangle
page.insert_text((x0, y0), str(j + 1), fontsize=10, color=(1, 0, 0)) # Insert the index at the top left corner of the rectangle
# Save the PDF # Save the PDF
doc.save(f"{out_path}/layout.pdf") doc.save(f"{out_path}/layout.pdf")
...@@ -56,11 +63,9 @@ def draw_text_bbox(pdf_info_dict, input_path, out_path): ...@@ -56,11 +63,9 @@ def draw_text_bbox(pdf_info_dict, input_path, out_path):
doc = fitz.open(input_path) doc = fitz.open(input_path)
for i, page in enumerate(doc): for i, page in enumerate(doc):
# 获取当前页面的数据 # 获取当前页面的数据
draw_bbox(i, text_list, page, [255, 0, 0]) draw_bbox_without_number(i, text_list, page, [255, 0, 0])
draw_bbox_without_number(i, inline_equation_list, page, [0, 255, 0])
draw_bbox(i, inline_equation_list, page, [0, 255, 0]) draw_bbox_without_number(i, displayed_equation_list, page, [0, 0, 255])
draw_bbox(i, displayed_equation_list, page, [0, 0, 255])
# Save the PDF # Save the PDF
doc.save(f"{out_path}/text.pdf") doc.save(f"{out_path}/text.pdf")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment