Update cut_image.py

c88b297c · zhougaofeng · b0aeaeaa · c88b297c
Commit c88b297c authored Nov 22, 2024 by zhougaofeng
Show whitespace changes
Inline Side-by-side

Showing with 12 additions and 9 deletions

magic_pdf/pre_proc/cut_image.py magic_pdf/pre_proc/cut_image.py +12 -9

No files found.
--- a/magic_pdf/pre_proc/cut_image.py
+++ b/magic_pdf/pre_proc/cut_image.py
@@ -8,15 +8,12 @@ from magic_pdf.libs.ocr_content_type import ContentType
 from magic_pdf.libs.pdf_image_tools import cut_image
 from multiprocessing import Pool
-# vllm：
-from magic_pdf.dict2md.ocr_vllm_client import PredictClient,compress_image
-# 普通 非vllm
-# from magic_pdf.dict2md.ocr_client import PredictClient,compress_image
 text = '解析图片内容，直接返回一段带有逻辑性的中文书面语描述，要求表达精准，不脱离图片中的实际内容，不要带换行,文中所有的名词不要用指代词'
 client = None
-def ocr_image(image_path):
+def ocr_image(image_path,compress_image):
    start = time.time()
    compress_image(image_path)
    txt = os.getpid()
@@ -31,13 +28,19 @@ def ocr_cut_image_and_table(ocr_status,config_path,local_image_dir,spans, page,
    def return_path(type):
        return join_path(pdf_bytes_md5, type)
-    pool = Pool(4)
    global client
    config = configparser.ConfigParser()
    config.read(config_path)
+    vllm_able = config.get('vllm', 'vllm_able')
+    if vllm_able:
+        from magic_pdf.dict2md.ocr_vllm_client import PredictClient, compress_image
+    else:
+        from magic_pdf.dict2md.ocr_client import PredictClient,compress_image
    url = config.get('server', 'ocr_server')
+    workers = int(config.get('server', 'ocr_workers'))
    client = PredictClient(url)
+    pool = Pool(workers)
    if not ocr_status:
        logger.warning(f'Health check failed. The server at "{url}" is not responding as expected.')
        logger.info(f'Qwen ocr解析服务无法正常运行,暂不使用qwen解析表格服务')
@@ -57,7 +60,7 @@ def ocr_cut_image_and_table(ocr_status,config_path,local_image_dir,spans, page,
            image_path = join_path(local_image_dir, image_path)
            if ocr_status:
-                txt = pool.apply_async(ocr_image, args=(image_path,)).get()
+                txt = pool.apply_async(ocr_image, args=(image_path,compress_image)).get()
                span['image_path'] = str(txt)
            else:
                span['image_path'] = f"----------------图片路径为({image_path})，请检查qwen ocr服务，重新运行文件解析-------------------  \n"