Commit 28e8f8b8 authored by zhougaofeng's avatar zhougaofeng
Browse files

Update cut_image.py

parent 485659df
......@@ -24,7 +24,7 @@ def ocr_image(image_path,compress_image):
logger.info(f'qwen解析{image_path}表格的内容为:{generated_text},耗时为:{end - start}')
return generated_text
def ocr_cut_image_and_table(ocr_status,config_path,local_image_dir,spans, page, page_id, pdf_bytes_md5, imageWriter):
def ocr_cut_image_and_table(config_path,local_image_dir,spans, page, page_id, pdf_bytes_md5, imageWriter):
def return_path(type):
return join_path(pdf_bytes_md5, type)
......@@ -40,6 +40,7 @@ def ocr_cut_image_and_table(ocr_status,config_path,local_image_dir,spans, page,
url = config.get('server', 'ocr_server')
workers = int(config.get('server', 'ocr_workers'))
client = PredictClient(url)
ocr_status = client.check_health()
pool = Pool(workers)
if not ocr_status:
logger.warning(f'Health check failed. The server at "{url}" is not responding as expected.')
......@@ -59,7 +60,6 @@ def ocr_cut_image_and_table(ocr_status,config_path,local_image_dir,spans, page,
imageWriter=imageWriter)
image_path = join_path(local_image_dir, image_path)
if ocr_status:
txt = pool.apply_async(ocr_image, args=(image_path,compress_image)).get()
span['image_path'] = str(txt)
else:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment