benchmark.py 2.17 KB
Newer Older
quyuan's avatar
quyuan committed
1
2
3
4
5
6
7
8
9
import zipfile
import os
import shutil
code_path = os.environ.get('GITHUB_WORKSPACE')
pdf_dev_path = "/home/quyuan/data"
pdf_res_path = "/home/quyuan/code/Magic-PDF/Magic-PDF/Magic-PDF/ci/magic-pdf"
def test_cli():
    cmd = 'cd %s && export PYTHONPATH=. && find %s -type f -name "*.pdf" | xargs -I{} python magic_pdf/cli/magicpdf.py  pdf-command  --pdf {}' % (code_path, pdf_dev_path)
    os.system(cmd)
quyuan's avatar
quyuan committed
10
11
12
    if not os.path.exists(os.path.join(pdf_dev_path, "output")):
        os.makedirs(os.path.join(pdf_dev_path, "output"))
    for annotaion_name in os.listdir(os.path.join(pdf_dev_path, "output")):
quyuan's avatar
quyuan committed
13
14
        if annotaion_name.endswith('.pdf'):
            for pdf_res_path  in os.listdir(pdf_res_path):
quyuan's avatar
quyuan committed
15
                if ".md" in os.path.join(pdf_res_path, annotaion_name, "auto"):
quyuan's avatar
quyuan committed
16
17
                    prefix = annotaion_name.split('_')[-2]
                    if not os.path.exists(os.join(pdf_dev_path, prefix)):
quyuan's avatar
quyuan committed
18
19
                        os.makedirs(os.path.join(pdf_dev_path, prefix))
                        shutil.copy(os.path.join(pdf_res_path, annotaion_name, "auto", annotaion_name + ".md"), os.join(pdf_dev_path, prefix, annotaion_name + ".md"))
quyuan's avatar
quyuan committed
20
21
22
                   

def calculate_score():
quyuan's avatar
quyuan committed
23
    cmd = "cd %s && export PYTHONPATH=. && python tools/clean_photo.py --tool_name annotations --download_dir %s" % (code_path, pdf_dev_path)
quyuan's avatar
quyuan committed
24
    os.system(cmd)
quyuan's avatar
quyuan committed
25
    cmd = "cd %s && export PYTHONPATH=. && python tools/clean_photo.py --tool_name magicpdf --download_dir %s" % (code_path, pdf_dev_path)
quyuan's avatar
quyuan committed
26
    os.system(cmd)
quyuan's avatar
quyuan committed
27
    cmd = "cd %s && export PYTHONPATH=. && python tools/markdown_calculate.py --tool_name pdf-command --download_dir %s --results %s" % (code_path, pdf_dev_path, os.path.join(pdf_dev_path, "result.json"))
quyuan's avatar
quyuan committed
28
29
30
31
32
33
34
35
36
37
38
39
40
    os.system(cmd)


def extrat_zip(zip_file_path, extract_to_path):
    if zipfile.is_zipfile(zip_file_path):
        with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
            zip_ref.extractall(extract_to_path)
        print(f'Files extracted to {extract_to_path}')
    else:
        print(f'{zip_file_path} is not a zip file')


if __name__ == "__main__":
quyuan's avatar
quyuan committed
41
    extrat_zip(os.path.join(pdf_dev_path, 'output.zip'), os.path.join(pdf_dev_path,'datasets'))
quyuan's avatar
quyuan committed
42
    test_cli()
quyuan's avatar
quyuan committed
43
    calculate_score()