"model/vscode:/vscode.git/clone" did not exist on "fbd82ba5bb35c42a6b09f5bd50ff1aa0690b9626"
test_cli_dev.py 3.45 KB
Newer Older
icecraft's avatar
icecraft committed
1
2
import os
import shutil
3
4
import tempfile

icecraft's avatar
icecraft committed
5
6
7
8
9
10
11
from click.testing import CliRunner

from magic_pdf.tools import cli_dev


def test_cli_pdf():
    # setup
12
13
    unitest_dir = '/tmp/magic_pdf/unittest/tools'
    filename = 'cli_test_01'
icecraft's avatar
icecraft committed
14
    os.makedirs(unitest_dir, exist_ok=True)
15
16
    temp_output_dir = tempfile.mkdtemp(dir='/tmp/magic_pdf/unittest/tools')
    os.makedirs(temp_output_dir, exist_ok=True)
icecraft's avatar
icecraft committed
17
18
19
20
21
22

    # run
    runner = CliRunner()
    result = runner.invoke(
        cli_dev.cli,
        [
23
24
25
26
27
28
            'pdf',
            '-p',
            'tests/test_tools/assets/cli/pdf/cli_test_01.pdf',
            '-j',
            'tests/test_tools/assets/cli_dev/cli_test_01.model.json',
            '-o',
icecraft's avatar
icecraft committed
29
30
31
32
33
34
35
            temp_output_dir,
        ],
    )

    # check
    assert result.exit_code == 0

36
    base_output_dir = os.path.join(temp_output_dir, 'cli_test_01/auto')
icecraft's avatar
icecraft committed
37

38
    r = os.stat(os.path.join(base_output_dir, f'{filename}_content_list.json'))
icecraft's avatar
icecraft committed
39
40
    assert r.st_size > 5000

41
    r = os.stat(os.path.join(base_output_dir, f'{filename}.md'))
icecraft's avatar
icecraft committed
42
43
    assert r.st_size > 7000

44
    r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json'))
icecraft's avatar
icecraft committed
45
46
    assert r.st_size > 200000

47
    r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json'))
icecraft's avatar
icecraft committed
48
49
    assert r.st_size > 15000

50
    r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf'))
icecraft's avatar
icecraft committed
51
52
    assert r.st_size > 500000

53
    r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf'))
icecraft's avatar
icecraft committed
54
55
    assert r.st_size > 500000

56
    r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf'))
icecraft's avatar
icecraft committed
57
58
    assert r.st_size > 500000

59
60
    assert os.path.exists(os.path.join(base_output_dir, 'images')) is True
    assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True
icecraft's avatar
icecraft committed
61
62
63
64
65
66
67

    # teardown
    shutil.rmtree(temp_output_dir)


def test_cli_jsonl():
    # setup
68
69
    unitest_dir = '/tmp/magic_pdf/unittest/tools'
    filename = 'cli_test_01'
icecraft's avatar
icecraft committed
70
    os.makedirs(unitest_dir, exist_ok=True)
71
72
    temp_output_dir = tempfile.mkdtemp(dir='/tmp/magic_pdf/unittest/tools')
    os.makedirs(temp_output_dir, exist_ok=True)
icecraft's avatar
icecraft committed
73
74

    def mock_read_s3_path(s3path):
75
        with open(s3path, 'rb') as f:
icecraft's avatar
icecraft committed
76
77
            return f.read()

78
    cli_dev.read_s3_path = mock_read_s3_path  # mock
icecraft's avatar
icecraft committed
79
80
81
82
83
84

    # run
    runner = CliRunner()
    result = runner.invoke(
        cli_dev.cli,
        [
85
86
87
88
            'jsonl',
            '-j',
            'tests/test_tools/assets/cli_dev/cli_test_01.jsonl',
            '-o',
icecraft's avatar
icecraft committed
89
90
91
92
93
94
95
            temp_output_dir,
        ],
    )

    # check
    assert result.exit_code == 0

96
    base_output_dir = os.path.join(temp_output_dir, 'cli_test_01/auto')
icecraft's avatar
icecraft committed
97

98
    r = os.stat(os.path.join(base_output_dir, f'{filename}_content_list.json'))
icecraft's avatar
icecraft committed
99
100
    assert r.st_size > 5000

101
    r = os.stat(os.path.join(base_output_dir, f'{filename}.md'))
icecraft's avatar
icecraft committed
102
103
    assert r.st_size > 7000

104
    r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json'))
icecraft's avatar
icecraft committed
105
106
    assert r.st_size > 200000

107
    r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json'))
icecraft's avatar
icecraft committed
108
109
    assert r.st_size > 15000

110
    r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf'))
icecraft's avatar
icecraft committed
111
112
    assert r.st_size > 500000

113
    r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf'))
icecraft's avatar
icecraft committed
114
115
    assert r.st_size > 500000

116
    r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf'))
icecraft's avatar
icecraft committed
117
118
    assert r.st_size > 500000

119
120
    assert os.path.exists(os.path.join(base_output_dir, 'images')) is True
    assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True
icecraft's avatar
icecraft committed
121
122
123

    # teardown
    shutil.rmtree(temp_output_dir)