test_pdf2text_recogPara_BlockInnerParasProcessor.py 4.01 KB
Newer Older
赵小蒙's avatar
赵小蒙 committed
1
2
import unittest

赵小蒙's avatar
赵小蒙 committed
3
from post_proc.detect_para import BlockTerminationProcessor
赵小蒙's avatar
赵小蒙 committed
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107

# from ... pdf2text_recogPara import BlockInnerParasProcessor # another way to import

"""
Execute the following command to run the test under directory code-clean:

    python -m test.test_para.test_pdf2text_recogPara_ClassName
    
    or
    
    pytest -v -s app/pdf_toolbox/test/test_para/test_pdf2text_recogPara_BlockInnerParasProcessor.py
    
"""


class TestIsConsistentLines(unittest.TestCase):
    def setUp(self):
        self.obj = BlockTerminationProcessor()

    def test_consistent_with_prev_line(self):
        curr_line = {"spans": [{"size": 12, "font": "Arial"}]}
        prev_line = {"spans": [{"size": 12, "font": "Arial"}]}
        next_line = None
        consistent_direction = 0
        result = self.obj._is_consistent_lines(curr_line, prev_line, next_line, consistent_direction)
        self.assertTrue(result)

    def test_consistent_with_next_line(self):
        curr_line = {"spans": [{"size": 12, "font": "Arial"}]}
        prev_line = None
        next_line = {"spans": [{"size": 12, "font": "Arial"}]}
        consistent_direction = 1
        result = self.obj._is_consistent_lines(curr_line, prev_line, next_line, consistent_direction)
        self.assertTrue(result)

    def test_consistent_with_both_lines(self):
        curr_line = {"spans": [{"size": 12, "font": "Arial"}]}
        prev_line = {"spans": [{"size": 12, "font": "Arial"}]}
        next_line = {"spans": [{"size": 12, "font": "Arial"}]}
        consistent_direction = 2
        result = self.obj._is_consistent_lines(curr_line, prev_line, next_line, consistent_direction)
        self.assertTrue(result)

    def test_inconsistent_with_prev_line(self):
        curr_line = {"spans": [{"size": 12, "font": "Arial"}]}
        prev_line = {"spans": [{"size": 14, "font": "Arial"}]}
        next_line = None
        consistent_direction = 0
        result = self.obj._is_consistent_lines(curr_line, prev_line, next_line, consistent_direction)
        self.assertFalse(result)

    def test_inconsistent_with_next_line(self):
        curr_line = {"spans": [{"size": 12, "font": "Arial"}]}
        prev_line = None
        next_line = {"spans": [{"size": 14, "font": "Arial"}]}
        consistent_direction = 1
        result = self.obj._is_consistent_lines(curr_line, prev_line, next_line, consistent_direction)
        self.assertFalse(result)

    def test_inconsistent_with_both_lines(self):
        curr_line = {"spans": [{"size": 12, "font": "Arial"}]}
        prev_line = {"spans": [{"size": 14, "font": "Arial"}]}
        next_line = {"spans": [{"size": 14, "font": "Arial"}]}
        consistent_direction = 2
        result = self.obj._is_consistent_lines(curr_line, prev_line, next_line, consistent_direction)
        self.assertFalse(result)

    def test_invalid_consistent_direction(self):
        curr_line = {"spans": [{"size": 12, "font": "Arial"}]}
        prev_line = None
        next_line = None
        consistent_direction = 3
        result = self.obj._is_consistent_lines(curr_line, prev_line, next_line, consistent_direction)
        self.assertFalse(result)

    def test_possible_start_of_para(self):
        curr_line = {"bbox": (0, 0, 100, 10)}
        prev_line = {"bbox": (0, 20, 100, 30)}
        next_line = {"bbox": (0, 40, 100, 50)}
        X0 = 0
        X1 = 100
        avg_char_width = 5
        avg_font_size = 10

        result, _, _ = self.obj._is_possible_start_of_para(
            curr_line, prev_line, next_line, X0, X1, avg_char_width, avg_font_size
        )
        self.assertTrue(result)

    def test_not_possible_start_of_para(self):
        curr_line = {"bbox": (0, 0, 100, 10)}
        prev_line = {"bbox": (0, 20, 100, 30)}
        next_line = {"bbox": (0, 40, 100, 50)}
        X0 = 0
        X1 = 100
        avg_char_width = 5
        avg_font_size = 10

        result, _, _ = self.obj._is_possible_start_of_para(curr_line, prev_line, next_line, X0, X1, avg_char_width, avg_font_size)
        self.assertTrue(result)


if __name__ == "__main__":
    unittest.main()