test_bench.py 2.34 KB
Newer Older
quyuan's avatar
add ci  
quyuan committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
"""
bench
"""
import os
import shutil
import json
from lib import calculate_score
import pytest
from conf import conf

code_path = os.environ.get('GITHUB_WORKSPACE')
pdf_dev_path = conf.conf["pdf_dev_path"]
pdf_res_path = conf.conf["pdf_res_path"]

class TestBench():
    """
    test bench
    """
    def ci_ben(self):
        """
        ci benchmark
        """
        try:
            fr = open(os.path.join(pdf_dev_path, "result.json"), "r", encoding="utf-8")
            lines = fr.readlines()
            last_line = lines[-1].strip()
            last_score = json.loads(last_line)
            print ("last_score:", last_score)
            last_simscore = last_score["average_sim_score"]
            last_editdistance = last_score["average_edit_distance"]
            last_bleu = last_score["average_bleu_score"]
        except IOError:
            print ("result.json not exist")
        test_cli()
    
        os.system(f"python lib/pre_clean.py --tool_name mineru --download_dir {pdf_dev_path}")
        now_score = get_score()
        print ("now_score:", now_score)
        now_simscore = now_score["average_sim_score"]
        now_editdistance = now_score["average_edit_distance"]
        now_bleu = now_score["average_bleu_score"]
        assert last_simscore <= now_simscore
        assert last_editdistance <= now_editdistance
        assert last_bleu <= now_bleu


def get_score():
    """
    get score
    """
    data_path = os.path.join(pdf_dev_path, "ci")
    score = calculate_score.Scoring(os.path.join(data_path, "result.json"))
    score.calculate_similarity_total("mineru", data_path)
    res = score.summary_scores()
    return res

def test_cli():
    """
    test pdf-command cli
    """
    rm_cmd = f"rm -rf {pdf_res_path}"
    os.system(rm_cmd)
    os.makedirs(pdf_res_path)
    cmd = f'magic-pdf pdf-command --pdf {os.path.join(pdf_dev_path, "mineru")}'
    os.system(cmd)
    for root, dirs, files in os.walk(pdf_res_path):
         for magic_file in files:
            target_dir = os.path.join(pdf_dev_path, "mineru")
            if magic_file.endswith(".md"):
                source_file = os.path.join(root, magic_file)
                target_file = os.path.join(pdf_dev_path, "mineru", magic_file)
                if not os.path.exists(target_dir):
                    os.makedirs(target_dir) 
                shutil.copy(source_file, target_file)