test_cli.py 3.68 KB
Newer Older
yyy's avatar
yyy committed
1
import tempfile
icecraft's avatar
icecraft committed
2
3
4
5
6
7
8
9
10
import os
import shutil
from click.testing import CliRunner

from magic_pdf.tools.cli import cli


def test_cli_pdf():
    # setup
yyy's avatar
yyy committed
11
12
    unitest_dir = "/tmp/magic_pdf/unittest/tools"
    filename = "cli_test_01"
icecraft's avatar
icecraft committed
13
    os.makedirs(unitest_dir, exist_ok=True)
yyy's avatar
yyy committed
14
    temp_output_dir = tempfile.mkdtemp(dir="/tmp/magic_pdf/unittest/tools")
icecraft's avatar
icecraft committed
15
16
17
18
19
20

    # run
    runner = CliRunner()
    result = runner.invoke(
        cli,
        [
yyy's avatar
yyy committed
21
22
23
            "-p",
            "tests/test_tools/assets/cli/pdf/cli_test_01.pdf",
            "-o",
icecraft's avatar
icecraft committed
24
25
26
27
28
29
30
            temp_output_dir,
        ],
    )

    # check
    assert result.exit_code == 0

yyy's avatar
yyy committed
31
    base_output_dir = os.path.join(temp_output_dir, "cli_test_01/auto")
icecraft's avatar
icecraft committed
32

yyy's avatar
yyy committed
33
    r = os.stat(os.path.join(base_output_dir, f"{filename}.md"))
icecraft's avatar
icecraft committed
34
35
    assert r.st_size > 7000

yyy's avatar
yyy committed
36
    r = os.stat(os.path.join(base_output_dir, "middle.json"))
icecraft's avatar
icecraft committed
37
38
    assert r.st_size > 200000

yyy's avatar
yyy committed
39
    r = os.stat(os.path.join(base_output_dir, "model.json"))
icecraft's avatar
icecraft committed
40
41
    assert r.st_size > 15000

yyy's avatar
yyy committed
42
    r = os.stat(os.path.join(base_output_dir, "origin.pdf"))
icecraft's avatar
icecraft committed
43
44
    assert r.st_size > 500000

yyy's avatar
yyy committed
45
    r = os.stat(os.path.join(base_output_dir, "layout.pdf"))
icecraft's avatar
icecraft committed
46
47
    assert r.st_size > 500000

yyy's avatar
yyy committed
48
    r = os.stat(os.path.join(base_output_dir, "spans.pdf"))
icecraft's avatar
icecraft committed
49
50
    assert r.st_size > 500000

yyy's avatar
yyy committed
51
52
53
    assert os.path.exists(os.path.join(base_output_dir, "images")) is True
    assert os.path.isdir(os.path.join(base_output_dir, "images")) is True
    assert os.path.exists(os.path.join(base_output_dir, "content_list.json")) is False
icecraft's avatar
icecraft committed
54
55
56
57
58
59
60

    # teardown
    shutil.rmtree(temp_output_dir)


def test_cli_path():
    # setup
yyy's avatar
yyy committed
61
    unitest_dir = "/tmp/magic_pdf/unittest/tools"
icecraft's avatar
icecraft committed
62
    os.makedirs(unitest_dir, exist_ok=True)
yyy's avatar
yyy committed
63
    temp_output_dir = tempfile.mkdtemp(dir="/tmp/magic_pdf/unittest/tools")
icecraft's avatar
icecraft committed
64
65
66
67

    # run
    runner = CliRunner()
    result = runner.invoke(
yyy's avatar
yyy committed
68
69
        cli, ["-p", "tests/test_tools/assets/cli/path", "-o", temp_output_dir]
    )
icecraft's avatar
icecraft committed
70
71
72
73

    # check
    assert result.exit_code == 0

yyy's avatar
yyy committed
74
75
    filename = "cli_test_01"
    base_output_dir = os.path.join(temp_output_dir, "cli_test_01/auto")
icecraft's avatar
icecraft committed
76

yyy's avatar
yyy committed
77
    r = os.stat(os.path.join(base_output_dir, f"{filename}.md"))
icecraft's avatar
icecraft committed
78
79
    assert r.st_size > 7000

yyy's avatar
yyy committed
80
    r = os.stat(os.path.join(base_output_dir, "middle.json"))
icecraft's avatar
icecraft committed
81
82
    assert r.st_size > 200000

yyy's avatar
yyy committed
83
    r = os.stat(os.path.join(base_output_dir, "model.json"))
icecraft's avatar
icecraft committed
84
85
    assert r.st_size > 15000

yyy's avatar
yyy committed
86
    r = os.stat(os.path.join(base_output_dir, "origin.pdf"))
icecraft's avatar
icecraft committed
87
88
    assert r.st_size > 500000

yyy's avatar
yyy committed
89
    r = os.stat(os.path.join(base_output_dir, "layout.pdf"))
icecraft's avatar
icecraft committed
90
91
    assert r.st_size > 500000

yyy's avatar
yyy committed
92
    r = os.stat(os.path.join(base_output_dir, "spans.pdf"))
icecraft's avatar
icecraft committed
93
94
    assert r.st_size > 500000

yyy's avatar
yyy committed
95
96
97
    assert os.path.exists(os.path.join(base_output_dir, "images")) is True
    assert os.path.isdir(os.path.join(base_output_dir, "images")) is True
    assert os.path.exists(os.path.join(base_output_dir, "content_list.json")) is False
icecraft's avatar
icecraft committed
98

yyy's avatar
yyy committed
99
100
    base_output_dir = os.path.join(temp_output_dir, "cli_test_02/auto")
    filename = "cli_test_02"
icecraft's avatar
icecraft committed
101

yyy's avatar
yyy committed
102
    r = os.stat(os.path.join(base_output_dir, f"{filename}.md"))
icecraft's avatar
icecraft committed
103
104
    assert r.st_size > 5000

yyy's avatar
yyy committed
105
    r = os.stat(os.path.join(base_output_dir, "middle.json"))
icecraft's avatar
icecraft committed
106
107
    assert r.st_size > 200000

yyy's avatar
yyy committed
108
    r = os.stat(os.path.join(base_output_dir, "model.json"))
icecraft's avatar
icecraft committed
109
110
    assert r.st_size > 15000

yyy's avatar
yyy committed
111
    r = os.stat(os.path.join(base_output_dir, "origin.pdf"))
icecraft's avatar
icecraft committed
112
113
    assert r.st_size > 500000

yyy's avatar
yyy committed
114
    r = os.stat(os.path.join(base_output_dir, "layout.pdf"))
icecraft's avatar
icecraft committed
115
116
    assert r.st_size > 500000

yyy's avatar
yyy committed
117
    r = os.stat(os.path.join(base_output_dir, "spans.pdf"))
icecraft's avatar
icecraft committed
118
119
    assert r.st_size > 500000

yyy's avatar
yyy committed
120
121
122
    assert os.path.exists(os.path.join(base_output_dir, "images")) is True
    assert os.path.isdir(os.path.join(base_output_dir, "images")) is True
    assert os.path.exists(os.path.join(base_output_dir, "content_list.json")) is False
icecraft's avatar
icecraft committed
123
124
125

    # teardown
    shutil.rmtree(temp_output_dir)