"vscode:/vscode.git/clone" did not exist on "f55a1a51a04e74ecac04e01d33343029e87b8cee"
Commit 8b714854 authored by quyuan's avatar quyuan
Browse files

add gpu ci

parent 54430119
import pytest
import os
from conf import conf
import os
import json
from magic_pdf.pipe.UNIPipe import UNIPipe
from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter
from lib import calculate_score
pdf_res_path = conf.conf["pdf_res_path"]
code_path = conf.conf["code_path"]
pdf_dev_path = conf.conf["pdf_dev_path"]
class TestCliCuda:
"""
test cli cuda
"""
def test_pdf_sdk_cuda(self):
"""
pdf sdk cuda
"""
clean_magicpdf(pdf_res_path)
pdf_to_markdown()
fr = open(os.path.join(pdf_dev_path, "result.json"), "r", encoding="utf-8")
lines = fr.readlines()
last_line = lines[-1].strip()
last_score = json.loads(last_line)
last_simscore = last_score["average_sim_score"]
last_editdistance = last_score["average_edit_distance"]
last_bleu = last_score["average_bleu_score"]
os.system(f"python tests/test_cli/lib/pre_clean.py --tool_name mineru --download_dir {pdf_dev_path}")
now_score = get_score()
print ("now_score:", now_score)
if not os.path.exists(os.path.join(pdf_dev_path, "ci")):
os.makedirs(os.path.join(pdf_dev_path, "ci"), exist_ok=True)
fw = open(os.path.join(pdf_dev_path, "ci", "result.json"), "w+", encoding="utf-8")
fw.write(json.dumps(now_score) + "\n")
now_simscore = now_score["average_sim_score"]
now_editdistance = now_score["average_edit_distance"]
now_bleu = now_score["average_bleu_score"]
assert last_simscore <= now_simscore
assert last_editdistance <= now_editdistance
assert last_bleu <= now_bleu
def pdf_to_markdown():
"""
pdf to md
"""
demo_names = list()
pdf_path = os.path.join(pdf_dev_path, "pdf")
for pdf_file in os.listdir(pdf_path):
if pdf_file.endswith('.pdf'):
demo_names.append(pdf_file.split('.')[0])
for demo_name in demo_names:
pdf_path = os.path.join(pdf_dev_path, "pdf", f"{demo_name}.pdf")
cmd = "magic-pdf pdf-command --pdf %s --inside_model true" % (pdf_path)
os.system(cmd)
dir_path = os.path.join(pdf_dev_path, "mineru")
if not os.path.exists(dir_path):
os.makedirs(dir_path, exist_ok=True)
res_path = os.path.join(dir_path, f"{demo_name}.md")
#src_path = os.path.join(pdf_res_path, "pdf", f"{demo_name}.pdf")
#shutil.copy(src_path, res_path)
def get_score():
"""
get score
"""
score = calculate_score.Scoring(os.path.join(pdf_dev_path, "result.json"))
score.calculate_similarity_total("mineru", pdf_dev_path)
res = score.summary_scores()
return res
def clean_magicpdf(pdf_res_path):
"""
clean magicpdf
"""
cmd = "rm -rf %s" % (pdf_res_path)
os.system(cmd)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment