test_common.py 1.46 KB
Newer Older
yyy's avatar
yyy committed
1
import tempfile
icecraft's avatar
icecraft committed
2
3
4
5
6
7
8
9
import os
import shutil

import pytest

from magic_pdf.tools.common import do_parse


yyy's avatar
yyy committed
10
@pytest.mark.parametrize("method", ["auto", "txt", "ocr"])
icecraft's avatar
icecraft committed
11
12
def test_common_do_parse(method):
    # setup
yyy's avatar
yyy committed
13
14
    unitest_dir = "/tmp/magic_pdf/unittest/tools"
    filename = "fake"
icecraft's avatar
icecraft committed
15
16
    os.makedirs(unitest_dir, exist_ok=True)

yyy's avatar
yyy committed
17
18
    temp_output_dir = tempfile.mkdtemp(dir="/tmp/magic_pdf/unittest/tools")

icecraft's avatar
icecraft committed
19
    # run
yyy's avatar
yyy committed
20
    with open("tests/test_tools/assets/common/cli_test_01.pdf", "rb") as f:
icecraft's avatar
icecraft committed
21
        bits = f.read()
yyy's avatar
yyy committed
22
    do_parse(temp_output_dir, filename, bits, [], method, f_dump_content_list=True)
icecraft's avatar
icecraft committed
23
24

    # check
yyy's avatar
yyy committed
25
    base_output_dir = os.path.join(temp_output_dir, f"fake/{method}")
icecraft's avatar
icecraft committed
26

yyy's avatar
yyy committed
27
    r = os.stat(os.path.join(base_output_dir, "content_list.json"))
icecraft's avatar
icecraft committed
28
29
    assert r.st_size > 5000

yyy's avatar
yyy committed
30
    r = os.stat(os.path.join(base_output_dir, f"{filename}.md"))
icecraft's avatar
icecraft committed
31
32
    assert r.st_size > 7000

yyy's avatar
yyy committed
33
    r = os.stat(os.path.join(base_output_dir, "middle.json"))
icecraft's avatar
icecraft committed
34
35
    assert r.st_size > 200000

yyy's avatar
yyy committed
36
    r = os.stat(os.path.join(base_output_dir, "model.json"))
icecraft's avatar
icecraft committed
37
38
    assert r.st_size > 15000

yyy's avatar
yyy committed
39
    r = os.stat(os.path.join(base_output_dir, "origin.pdf"))
icecraft's avatar
icecraft committed
40
41
    assert r.st_size > 500000

yyy's avatar
yyy committed
42
    r = os.stat(os.path.join(base_output_dir, "layout.pdf"))
icecraft's avatar
icecraft committed
43
44
    assert r.st_size > 500000

yyy's avatar
yyy committed
45
    r = os.stat(os.path.join(base_output_dir, "spans.pdf"))
icecraft's avatar
icecraft committed
46
47
    assert r.st_size > 500000

yyy's avatar
yyy committed
48
49
    os.path.exists(os.path.join(base_output_dir, "images"))
    os.path.isdir(os.path.join(base_output_dir, "images"))
icecraft's avatar
icecraft committed
50
51
52

    # teardown
    shutil.rmtree(temp_output_dir)