"docs/zh/git@developer.sourcefind.cn:wangsen/mineru.git" did not exist on "4b1c76864cfed56b7dcf501f5715c85193f65134"
Commit 28e8f8b8 authored by zhougaofeng's avatar zhougaofeng
Browse files

Update cut_image.py

parent 485659df
...@@ -24,7 +24,7 @@ def ocr_image(image_path,compress_image): ...@@ -24,7 +24,7 @@ def ocr_image(image_path,compress_image):
logger.info(f'qwen解析{image_path}表格的内容为:{generated_text},耗时为:{end - start}') logger.info(f'qwen解析{image_path}表格的内容为:{generated_text},耗时为:{end - start}')
return generated_text return generated_text
def ocr_cut_image_and_table(ocr_status,config_path,local_image_dir,spans, page, page_id, pdf_bytes_md5, imageWriter): def ocr_cut_image_and_table(config_path,local_image_dir,spans, page, page_id, pdf_bytes_md5, imageWriter):
def return_path(type): def return_path(type):
return join_path(pdf_bytes_md5, type) return join_path(pdf_bytes_md5, type)
...@@ -40,6 +40,7 @@ def ocr_cut_image_and_table(ocr_status,config_path,local_image_dir,spans, page, ...@@ -40,6 +40,7 @@ def ocr_cut_image_and_table(ocr_status,config_path,local_image_dir,spans, page,
url = config.get('server', 'ocr_server') url = config.get('server', 'ocr_server')
workers = int(config.get('server', 'ocr_workers')) workers = int(config.get('server', 'ocr_workers'))
client = PredictClient(url) client = PredictClient(url)
ocr_status = client.check_health()
pool = Pool(workers) pool = Pool(workers)
if not ocr_status: if not ocr_status:
logger.warning(f'Health check failed. The server at "{url}" is not responding as expected.') logger.warning(f'Health check failed. The server at "{url}" is not responding as expected.')
...@@ -59,7 +60,6 @@ def ocr_cut_image_and_table(ocr_status,config_path,local_image_dir,spans, page, ...@@ -59,7 +60,6 @@ def ocr_cut_image_and_table(ocr_status,config_path,local_image_dir,spans, page,
imageWriter=imageWriter) imageWriter=imageWriter)
image_path = join_path(local_image_dir, image_path) image_path = join_path(local_image_dir, image_path)
if ocr_status: if ocr_status:
txt = pool.apply_async(ocr_image, args=(image_path,compress_image)).get() txt = pool.apply_async(ocr_image, args=(image_path,compress_image)).get()
span['image_path'] = str(txt) span['image_path'] = str(txt)
else: else:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment