test_bench_gpu.py 2.59 KB
Newer Older
1

2
import json
quyuan's avatar
quyuan committed
3
import os
4
5
import shutil

quyuan's avatar
quyuan committed
6
7
from conf import conf
from lib import calculate_score
8
9
10
11

pdf_res_path = conf.conf['pdf_res_path']
code_path = conf.conf['code_path']
pdf_dev_path = conf.conf['pdf_dev_path']
quyuan's avatar
quyuan committed
12
class TestCliCuda:
13
    """test cli cuda."""
quyuan's avatar
quyuan committed
14
    def test_pdf_sdk_cuda(self):
15
        """pdf sdk cuda."""
quyuan's avatar
quyuan committed
16
17
        clean_magicpdf(pdf_res_path)
        pdf_to_markdown()
18
        fr = open(os.path.join(pdf_dev_path, 'result.json'), 'r', encoding='utf-8')
quyuan's avatar
quyuan committed
19
20
21
        lines = fr.readlines()
        last_line = lines[-1].strip()
        last_score = json.loads(last_line)
22
23
24
25
        last_simscore = last_score['average_sim_score']
        last_editdistance = last_score['average_edit_distance']
        last_bleu = last_score['average_bleu_score']
        os.system(f'python tests/test_cli/lib/pre_clean.py --tool_name mineru --download_dir {pdf_dev_path}')
quyuan's avatar
quyuan committed
26
        now_score = get_score()
27
28
29
30
31
32
33
34
        print ('now_score:', now_score)
        if not os.path.exists(os.path.join(pdf_dev_path, 'ci')):
            os.makedirs(os.path.join(pdf_dev_path, 'ci'), exist_ok=True)
        fw = open(os.path.join(pdf_dev_path, 'ci', 'result.json'), 'w+', encoding='utf-8')
        fw.write(json.dumps(now_score) + '\n')
        now_simscore = now_score['average_sim_score']
        now_editdistance = now_score['average_edit_distance']
        now_bleu = now_score['average_bleu_score']
quyuan's avatar
quyuan committed
35
36
37
38
39
        assert last_simscore <= now_simscore
        assert last_editdistance <= now_editdistance
        assert last_bleu <= now_bleu

def pdf_to_markdown():
40
    """pdf to md."""
quyuan's avatar
quyuan committed
41
    demo_names = list()
42
    pdf_path = os.path.join(pdf_dev_path, 'pdf')
quyuan's avatar
quyuan committed
43
44
45
46
    for pdf_file in os.listdir(pdf_path):
        if pdf_file.endswith('.pdf'):
            demo_names.append(pdf_file.split('.')[0])
    for demo_name in demo_names:
47
48
49
50
        pdf_path = os.path.join(pdf_dev_path, 'pdf', f'{demo_name}.pdf')
        cmd = 'magic-pdf pdf-command --pdf %s --inside_model true' % (pdf_path)
        os.system(cmd)
        dir_path = os.path.join(pdf_dev_path, 'mineru')
quyuan's avatar
quyuan committed
51
52
        if not os.path.exists(dir_path):
            os.makedirs(dir_path, exist_ok=True)
53
54
        res_path = os.path.join(dir_path, f'{demo_name}.md')
        src_path = os.path.join(pdf_res_path, demo_name, 'auto', f'{demo_name}.md')
quyuan's avatar
quyuan committed
55
        shutil.copy(src_path, res_path)
quyuan's avatar
quyuan committed
56
57
58
59



def get_score():
60
61
62
    """get score."""
    score = calculate_score.Scoring(os.path.join(pdf_dev_path, 'result.json'))
    score.calculate_similarity_total('mineru', pdf_dev_path)
quyuan's avatar
quyuan committed
63
64
65
66
67
    res = score.summary_scores()
    return res


def clean_magicpdf(pdf_res_path):
68
69
    """clean magicpdf."""
    cmd = 'rm -rf %s' % (pdf_res_path)
quyuan's avatar
quyuan committed
70
    os.system(cmd)