test_cli.py 3.91 KB
Newer Older
1
2
import os
import shutil
3
4
import tempfile

5
6
7
8
9
10
11
from click.testing import CliRunner

from magic_pdf.tools.cli import cli


def test_cli_pdf():
    # setup
12
13
    unitest_dir = '/tmp/magic_pdf/unittest/tools'
    filename = 'cli_test_01'
14
    os.makedirs(unitest_dir, exist_ok=True)
15
    temp_output_dir = tempfile.mkdtemp(dir='/tmp/magic_pdf/unittest/tools')
16
17
18
19
20
21

    # run
    runner = CliRunner()
    result = runner.invoke(
        cli,
        [
22
23
24
            '-p',
            'tests/unittest/test_tools/assets/cli/pdf/cli_test_01.pdf',
            '-o',
25
26
27
28
29
30
31
            temp_output_dir,
        ],
    )

    # check
    assert result.exit_code == 0

32
    base_output_dir = os.path.join(temp_output_dir, 'cli_test_01/auto')
33

34
    r = os.stat(os.path.join(base_output_dir, f'{filename}.md'))
35
36
    assert r.st_size > 7000

37
    r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json'))
38
39
    assert r.st_size > 200000

40
    r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json'))
41
42
    assert r.st_size > 15000

43
44
    r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf'))
    assert r.st_size > 400000
45

46
47
    r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf'))
    assert r.st_size > 400000
48

49
50
    r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf'))
    assert r.st_size > 400000
51

52
53
54
    assert os.path.exists(os.path.join(base_output_dir, 'images')) is True
    assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True
    assert os.path.exists(os.path.join(base_output_dir, f'{filename}_content_list.json')) is True
55
56
57
58
59
60
61

    # teardown
    shutil.rmtree(temp_output_dir)


def test_cli_path():
    # setup
62
    unitest_dir = '/tmp/magic_pdf/unittest/tools'
63
    os.makedirs(unitest_dir, exist_ok=True)
64
    temp_output_dir = tempfile.mkdtemp(dir='/tmp/magic_pdf/unittest/tools')
65
66
67
68

    # run
    runner = CliRunner()
    result = runner.invoke(
69
        cli, ['-p', 'tests/unittest/test_tools/assets/cli/path', '-o', temp_output_dir]
70
71
72
73
74
    )

    # check
    assert result.exit_code == 0

75
76
    filename = 'cli_test_01'
    base_output_dir = os.path.join(temp_output_dir, 'cli_test_01/auto')
77

78
    r = os.stat(os.path.join(base_output_dir, f'{filename}.md'))
79
80
    assert r.st_size > 7000

81
    r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json'))
82
83
    assert r.st_size > 200000

84
    r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json'))
85
86
    assert r.st_size > 15000

87
88
    r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf'))
    assert r.st_size > 400000
89

90
91
    r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf'))
    assert r.st_size > 400000
92

93
94
    r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf'))
    assert r.st_size > 400000
95

96
97
98
    assert os.path.exists(os.path.join(base_output_dir, 'images')) is True
    assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True
    assert os.path.exists(os.path.join(base_output_dir, f'{filename}_content_list.json')) is True
99

100
101
    base_output_dir = os.path.join(temp_output_dir, 'cli_test_02/auto')
    filename = 'cli_test_02'
102

103
    r = os.stat(os.path.join(base_output_dir, f'{filename}.md'))
104
105
    assert r.st_size > 5000

106
    r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json'))
107
108
    assert r.st_size > 200000

109
    r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json'))
110
111
    assert r.st_size > 15000

112
113
    r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf'))
    assert r.st_size > 400000
114

115
116
    r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf'))
    assert r.st_size > 400000
117

118
119
    r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf'))
    assert r.st_size > 400000
120

121
122
123
    assert os.path.exists(os.path.join(base_output_dir, 'images')) is True
    assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True
    assert os.path.exists(os.path.join(base_output_dir, f'{filename}_content_list.json')) is True
124
125
126

    # teardown
    shutil.rmtree(temp_output_dir)