Unverified Commit 4331b837 authored by Xiaomeng Zhao's avatar Xiaomeng Zhao Committed by GitHub
Browse files

fix(gradio): remove unused imports and simplify pdf display (#534)

Removed the previously used gradio and gradio-pdf imports which were not leveraged in the code. Also,
replaced the custom `show_pdf` function with direct use of the `PDF` component from gradio for a simpler
and more integrated PDF upload and display solution, improving code maintainability and readability.
parent 7d9221d6
...@@ -7,7 +7,6 @@ import zipfile ...@@ -7,7 +7,6 @@ import zipfile
from pathlib import Path from pathlib import Path
import re import re
import gradio as gr
from loguru import logger from loguru import logger
from magic_pdf.libs.hash_utils import compute_sha256 from magic_pdf.libs.hash_utils import compute_sha256
...@@ -15,6 +14,11 @@ from magic_pdf.rw.AbsReaderWriter import AbsReaderWriter ...@@ -15,6 +14,11 @@ from magic_pdf.rw.AbsReaderWriter import AbsReaderWriter
from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter
from magic_pdf.tools.common import do_parse, prepare_env from magic_pdf.tools.common import do_parse, prepare_env
os.system("pip install gradio")
os.system("pip install gradio-pdf")
import gradio as gr
from gradio_pdf import PDF
def read_fn(path): def read_fn(path):
disk_rw = DiskReaderWriter(os.path.dirname(path)) disk_rw = DiskReaderWriter(os.path.dirname(path))
...@@ -104,42 +108,60 @@ def to_markdown(file_path, end_pages): ...@@ -104,42 +108,60 @@ def to_markdown(file_path, end_pages):
# 返回转换后的PDF路径 # 返回转换后的PDF路径
new_pdf_path = os.path.join(local_md_dir, file_name + "_layout.pdf") new_pdf_path = os.path.join(local_md_dir, file_name + "_layout.pdf")
return md_content, txt_content, archive_zip_path, show_pdf(new_pdf_path) return md_content, txt_content, archive_zip_path, new_pdf_path
def show_pdf(file_path): # def show_pdf(file_path):
with open(file_path, "rb") as f: # with open(file_path, "rb") as f:
base64_pdf = base64.b64encode(f.read()).decode('utf-8') # base64_pdf = base64.b64encode(f.read()).decode('utf-8')
pdf_display = f'<embed src="data:application/pdf;base64,{base64_pdf}" ' \ # pdf_display = f'<embed src="data:application/pdf;base64,{base64_pdf}" ' \
f'width="100%" height="1000" type="application/pdf">' # f'width="100%" height="1000" type="application/pdf">'
return pdf_display # return pdf_display
latex_delimiters = [{"left": "$$", "right": "$$", "display": True}, latex_delimiters = [{"left": "$$", "right": "$$", "display": True},
{"left": '$', "right": '$', "display": False}] {"left": '$', "right": '$', "display": False}]
def init_model():
from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
try:
model_manager = ModelSingleton()
txt_model = model_manager.get_model(False, False)
logger.info(f"txt_model init final")
ocr_model = model_manager.get_model(True, False)
logger.info(f"ocr_model init final")
return 0
except Exception as e:
logger.exception(e)
return -1
model_init = init_model()
logger.info(f"model_init: {model_init}")
if __name__ == "__main__": if __name__ == "__main__":
with gr.Blocks() as demo: with gr.Blocks() as demo:
with gr.Row(): with gr.Row():
with gr.Column(variant='panel', scale=5): with gr.Column(variant='panel', scale=5):
file = gr.File(label="请上传pdf", file_types=[".pdf"]) pdf_show = gr.Markdown()
max_pages = gr.Slider(1, 10, 5, step=1, label="最大转换页数") max_pages = gr.Slider(1, 10, 5, step=1, label="Max convert pages")
with gr.Row() as bu_flow: with gr.Row() as bu_flow:
change_bu = gr.Button("转换") change_bu = gr.Button("Convert")
clear_bu = gr.ClearButton([file, max_pages], value="清除") clear_bu = gr.ClearButton([pdf_show], value="Clear")
gr.Markdown(value="### PDF预览") pdf_show = PDF(label="Please upload pdf", interactive=True, height=800)
pdf_show = gr.HTML(label="PDF预览")
with gr.Column(variant='panel', scale=5): with gr.Column(variant='panel', scale=5):
output_file = gr.File(label="Markdown识别结果文件", interactive=False) output_file = gr.File(label="convert result", interactive=False)
with gr.Tabs(): with gr.Tabs():
with gr.Tab("Markdown渲染"): with gr.Tab("Markdown rendering"):
md = gr.Markdown(label="Markdown渲染", height=1100, show_copy_button=True, md = gr.Markdown(label="Markdown rendering", height=900, show_copy_button=True,
latex_delimiters=latex_delimiters, line_breaks=True) latex_delimiters=latex_delimiters, line_breaks=True)
with gr.Tab("Markdown文本"): with gr.Tab("Markdown text"):
md_text = gr.TextArea(lines=55, show_copy_button=True) md_text = gr.TextArea(lines=45, show_copy_button=True)
file.upload(fn=show_pdf, inputs=file, outputs=pdf_show) change_bu.click(fn=to_markdown, inputs=[pdf_show, max_pages], outputs=[md, md_text, output_file, pdf_show])
change_bu.click(fn=to_markdown, inputs=[file, max_pages], outputs=[md, md_text, output_file, pdf_show])
clear_bu.add([md, pdf_show, md_text, output_file]) clear_bu.add([md, pdf_show, md_text, output_file])
demo.launch() demo.launch()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment