test_utils.py 1.67 KB
Newer Older
drunkpig's avatar
drunkpig committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import json
import os
import shutil
import tempfile

from magic_pdf.integrations.rag.type import CategoryType
from magic_pdf.integrations.rag.utils import (
    convert_middle_json_to_layout_elements, inference)


def test_convert_middle_json_to_layout_elements():
    # setup
    unitest_dir = '/tmp/magic_pdf/unittest/integrations/rag'
    os.makedirs(unitest_dir, exist_ok=True)
    temp_output_dir = tempfile.mkdtemp(dir=unitest_dir)
    os.makedirs(temp_output_dir, exist_ok=True)

    # test
    with open('tests/test_integrations/test_rag/assets/middle.json') as f:
        json_data = json.load(f)
    res = convert_middle_json_to_layout_elements(json_data, temp_output_dir)

    assert len(res) == 1
    assert len(res[0].layout_dets) == 10
    assert res[0].layout_dets[0].anno_id == 0
    assert res[0].layout_dets[0].category_type == CategoryType.text
    assert len(res[0].extra.element_relation) == 3

    # teardown
    shutil.rmtree(temp_output_dir)


def test_inference():

    asset_dir = 'tests/test_integrations/test_rag/assets'
    # setup
    unitest_dir = '/tmp/magic_pdf/unittest/integrations/rag'
    os.makedirs(unitest_dir, exist_ok=True)
    temp_output_dir = tempfile.mkdtemp(dir=unitest_dir)
    os.makedirs(temp_output_dir, exist_ok=True)

    # test
    res = inference(
        asset_dir + '/one_page_with_table_image.pdf',
        temp_output_dir,
        'ocr',
    )

    assert res is not None
    assert len(res) == 1
    assert len(res[0].layout_dets) == 10
    assert res[0].layout_dets[0].anno_id == 0
    assert res[0].layout_dets[0].category_type == CategoryType.text
    assert len(res[0].extra.element_relation) == 3

    # teardown
    shutil.rmtree(temp_output_dir)