test_cli.py 3.68 KB
Newer Older
icecraft's avatar
icecraft committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import tempfile
import os
import shutil
from click.testing import CliRunner

from magic_pdf.tools.cli import cli


def test_cli_pdf():
    # setup
    unitest_dir = "/tmp/magic_pdf/unittest/tools"
    filename = "cli_test_01"
    os.makedirs(unitest_dir, exist_ok=True)
    temp_output_dir = tempfile.mkdtemp(dir="/tmp/magic_pdf/unittest/tools")

    # run
    runner = CliRunner()
    result = runner.invoke(
        cli,
        [
            "-p",
            "tests/test_tools/assets/cli/pdf/cli_test_01.pdf",
            "-o",
            temp_output_dir,
        ],
    )

    # check
    assert result.exit_code == 0

    base_output_dir = os.path.join(temp_output_dir, "cli_test_01/auto")

    r = os.stat(os.path.join(base_output_dir, f"{filename}.md"))
    assert r.st_size > 7000

    r = os.stat(os.path.join(base_output_dir, "middle.json"))
    assert r.st_size > 200000

    r = os.stat(os.path.join(base_output_dir, "model.json"))
    assert r.st_size > 15000

    r = os.stat(os.path.join(base_output_dir, "origin.pdf"))
    assert r.st_size > 500000

    r = os.stat(os.path.join(base_output_dir, "layout.pdf"))
    assert r.st_size > 500000

    r = os.stat(os.path.join(base_output_dir, "spans.pdf"))
    assert r.st_size > 500000

    assert os.path.exists(os.path.join(base_output_dir, "images")) is True
    assert os.path.isdir(os.path.join(base_output_dir, "images")) is True
    assert os.path.exists(os.path.join(base_output_dir, "content_list.json")) is False

    # teardown
    shutil.rmtree(temp_output_dir)


def test_cli_path():
    # setup
    unitest_dir = "/tmp/magic_pdf/unittest/tools"
    os.makedirs(unitest_dir, exist_ok=True)
    temp_output_dir = tempfile.mkdtemp(dir="/tmp/magic_pdf/unittest/tools")

    # run
    runner = CliRunner()
    result = runner.invoke(
        cli, ["-p", "tests/test_tools/assets/cli/path", "-o", temp_output_dir]
    )

    # check
    assert result.exit_code == 0

    filename = "cli_test_01"
    base_output_dir = os.path.join(temp_output_dir, "cli_test_01/auto")

    r = os.stat(os.path.join(base_output_dir, f"{filename}.md"))
    assert r.st_size > 7000

    r = os.stat(os.path.join(base_output_dir, "middle.json"))
    assert r.st_size > 200000

    r = os.stat(os.path.join(base_output_dir, "model.json"))
    assert r.st_size > 15000

    r = os.stat(os.path.join(base_output_dir, "origin.pdf"))
    assert r.st_size > 500000

    r = os.stat(os.path.join(base_output_dir, "layout.pdf"))
    assert r.st_size > 500000

    r = os.stat(os.path.join(base_output_dir, "spans.pdf"))
    assert r.st_size > 500000

    assert os.path.exists(os.path.join(base_output_dir, "images")) is True
    assert os.path.isdir(os.path.join(base_output_dir, "images")) is True
    assert os.path.exists(os.path.join(base_output_dir, "content_list.json")) is False

    base_output_dir = os.path.join(temp_output_dir, "cli_test_02/auto")
    filename = "cli_test_02"

    r = os.stat(os.path.join(base_output_dir, f"{filename}.md"))
    assert r.st_size > 5000

    r = os.stat(os.path.join(base_output_dir, "middle.json"))
    assert r.st_size > 200000

    r = os.stat(os.path.join(base_output_dir, "model.json"))
    assert r.st_size > 15000

    r = os.stat(os.path.join(base_output_dir, "origin.pdf"))
    assert r.st_size > 500000

    r = os.stat(os.path.join(base_output_dir, "layout.pdf"))
    assert r.st_size > 500000

    r = os.stat(os.path.join(base_output_dir, "spans.pdf"))
    assert r.st_size > 500000

    assert os.path.exists(os.path.join(base_output_dir, "images")) is True
    assert os.path.isdir(os.path.join(base_output_dir, "images")) is True
    assert os.path.exists(os.path.join(base_output_dir, "content_list.json")) is False

    # teardown
    shutil.rmtree(temp_output_dir)