"vscode:/vscode.git/clone" did not exist on "0fb5268496792ec1da2b64f848a9a05ad0689ec3"
test_common.py 1.53 KB
Newer Older
icecraft's avatar
icecraft committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import tempfile
import os
import shutil

import pytest

from magic_pdf.tools.common import do_parse


@pytest.mark.parametrize("method", ["auto", "txt", "ocr"])
def test_common_do_parse(method):
    # setup
    unitest_dir = "/tmp/magic_pdf/unittest/tools"
    filename = "fake"
    os.makedirs(unitest_dir, exist_ok=True)

    temp_output_dir = tempfile.mkdtemp(dir="/tmp/magic_pdf/unittest/tools")

    # run
    with open("tests/test_tools/assets/common/cli_test_01.pdf", "rb") as f:
        bits = f.read()
drunkpig's avatar
drunkpig committed
22
23
24
25
26
27
    do_parse(temp_output_dir,
             filename,
             bits, [],
             method,
             False,
             f_dump_content_list=True)
icecraft's avatar
icecraft committed
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57

    # check
    base_output_dir = os.path.join(temp_output_dir, f"fake/{method}")

    r = os.stat(os.path.join(base_output_dir, "content_list.json"))
    assert r.st_size > 5000

    r = os.stat(os.path.join(base_output_dir, f"{filename}.md"))
    assert r.st_size > 7000

    r = os.stat(os.path.join(base_output_dir, "middle.json"))
    assert r.st_size > 200000

    r = os.stat(os.path.join(base_output_dir, "model.json"))
    assert r.st_size > 15000

    r = os.stat(os.path.join(base_output_dir, "origin.pdf"))
    assert r.st_size > 500000

    r = os.stat(os.path.join(base_output_dir, "layout.pdf"))
    assert r.st_size > 500000

    r = os.stat(os.path.join(base_output_dir, "spans.pdf"))
    assert r.st_size > 500000

    os.path.exists(os.path.join(base_output_dir, "images"))
    os.path.isdir(os.path.join(base_output_dir, "images"))

    # teardown
    shutil.rmtree(temp_output_dir)