draw_bbox.py 1.97 KB
Newer Older
1
from magic_pdf.libs.commons import fitz  # PyMuPDF
赵小蒙's avatar
赵小蒙 committed
2
3

# PDF文件路径
4
pdf_path = r"D:\project\20231108code-clean\magic_pdf\tmp\unittest\download-pdfs\ocr_1.json.pdf"
赵小蒙's avatar
赵小蒙 committed
5
6
7

doc = fitz.open(pdf_path)  # Open the PDF
# 你的数据
8
data = [[(294.7569528415961, 776.8430953398889, 300.8827085852479, 786.922616502779), (460.1523579201934, 776.8430953398889, 509.51874244256345, 787.2825994014537)], [(294.03627569528413, 779.7229585292861, 301.24304715840384, 788.3625480974777), (85.76058041112454, 781.882855921334, 156.74727932285367, 789.8024796921762)], [(293.6759371221282, 779.7229585292861, 301.60338573155985, 788.7225309961523), (459.43168077388145, 779.7229585292861, 508.7980652962515, 789.8024796921762)], [(295.8379685610641, 780.0829414279607, 301.24304715840384, 788.0025651988029), (85.76058041112454, 781.5228730226593, 156.74727932285367, 790.1624625908509)], [(294.03627569528413, 779.7229585292861, 301.60338573155985, 789.0825138948269), (459.79201934703747, 779.7229585292861, 508.4377267230955, 789.4424967935015)], [(86.4812575574365, 781.882855921334, 156.0266021765417, 789.8024796921762)], [(294.39661426844015, 779.7229585292861, 301.24304715840384, 788.3625480974777), (459.43168077388145, 779.7229585292861, 508.7980652962515, 789.4424967935015)], [(294.03627569528413, 779.7229585292861, 301.24304715840384, 788.3625480974777), (85.76058041112454, 781.5228730226593, 156.74727932285367, 789.8024796921762)], [(294.39661426844015, 779.7229585292861, 300.8827085852479, 788.3625480974777)]]
赵小蒙's avatar
赵小蒙 committed
9
10
11
12
13
14

# 对每个页面进行处理
for i, page in enumerate(doc):
    # 获取当前页面的数据
    page_data = data[i]
    for img in page_data:
15
16
        # x0, y0, x1, y1, _ = img
        x0, y0, x1, y1 = img
赵小蒙's avatar
赵小蒙 committed
17
18
19
20
        rect_coords = fitz.Rect(x0, y0, x1, y1)  # Define the rectangle
        page.draw_rect(rect_coords, color=(1, 0, 0), fill=None, width=1.5, overlay=True)  # Draw the rectangle

# Save the PDF
21
doc.save(r"D:\project\20231108code-clean\magic_pdf\tmp\unittest\download-pdfs\ocr_1.json_new.pdf")