"...promptdiffusion/pipeline_prompt_diffusion.py" did not exist on "b7b1a30bc49cad350c7a642e1171e886d83cd909"
test_bench_gpu.py 2.74 KB
Newer Older
quyuan's avatar
quyuan committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import pytest
import os
from conf import conf
import os
import json
from magic_pdf.pipe.UNIPipe import UNIPipe
from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter
from lib import calculate_score

pdf_res_path = conf.conf["pdf_res_path"]
code_path = conf.conf["code_path"]
pdf_dev_path = conf.conf["pdf_dev_path"]
class TestCliCuda:
    """
    test cli cuda
    """
    def test_pdf_sdk_cuda(self):
        """
        pdf sdk cuda
        """
        clean_magicpdf(pdf_res_path)
        pdf_to_markdown()
        fr = open(os.path.join(pdf_dev_path, "result.json"), "r", encoding="utf-8")
        lines = fr.readlines()
        last_line = lines[-1].strip()
        last_score = json.loads(last_line)
        last_simscore = last_score["average_sim_score"]
        last_editdistance = last_score["average_edit_distance"]
        last_bleu = last_score["average_bleu_score"]
        os.system(f"python tests/test_cli/lib/pre_clean.py --tool_name mineru --download_dir {pdf_dev_path}")
        now_score = get_score()
        print ("now_score:", now_score)
        if not os.path.exists(os.path.join(pdf_dev_path, "ci")):
            os.makedirs(os.path.join(pdf_dev_path, "ci"), exist_ok=True)
        fw = open(os.path.join(pdf_dev_path, "ci", "result.json"), "w+", encoding="utf-8")
        fw.write(json.dumps(now_score) + "\n")
        now_simscore = now_score["average_sim_score"]
        now_editdistance = now_score["average_edit_distance"]
        now_bleu = now_score["average_bleu_score"]
        assert last_simscore <= now_simscore
        assert last_editdistance <= now_editdistance
        assert last_bleu <= now_bleu

def pdf_to_markdown():
    """
    pdf to md
    """
    demo_names = list()
    pdf_path = os.path.join(pdf_dev_path, "pdf")
    for pdf_file in os.listdir(pdf_path):
        if pdf_file.endswith('.pdf'):
            demo_names.append(pdf_file.split('.')[0])
    for demo_name in demo_names:
        pdf_path = os.path.join(pdf_dev_path, "pdf", f"{demo_name}.pdf")
        cmd = "magic-pdf pdf-command --pdf %s --inside_model true" % (pdf_path)
        os.system(cmd) 
        dir_path = os.path.join(pdf_dev_path, "mineru")
        if not os.path.exists(dir_path):
            os.makedirs(dir_path, exist_ok=True)
        res_path = os.path.join(dir_path, f"{demo_name}.md")
        #src_path = os.path.join(pdf_res_path, "pdf", f"{demo_name}.pdf") 
        #shutil.copy(src_path, res_path)



def get_score():
    """
    get score
    """
    score = calculate_score.Scoring(os.path.join(pdf_dev_path, "result.json"))
    score.calculate_similarity_total("mineru", pdf_dev_path)
    res = score.summary_scores()
    return res


def clean_magicpdf(pdf_res_path):
    """
    clean magicpdf
    """
    cmd = "rm -rf %s" % (pdf_res_path)
    os.system(cmd)