Commit ec85af39 authored by myhloli's avatar myhloli
Browse files

fix: add error handling for block parsing in vlm_magic_model.py

parent b40c4327
import re
from typing import Literal
from loguru import logger
from mineru.utils.boxbase import bbox_distance, is_in
from mineru.utils.enum_class import ContentType, BlockType, SplitFlag
from mineru.backend.vlm.vlm_middle_json_mkcontent import merge_para_with_text
......@@ -22,6 +24,7 @@ class MagicModel:
# 解析每个块
for index, block_info in enumerate(block_infos):
block_bbox = block_info[0].strip()
try:
x1, y1, x2, y2 = map(int, block_bbox.split())
x_1, y_1, x_2, y_2 = (
int(x1 * width / 1000),
......@@ -41,6 +44,10 @@ class MagicModel:
# print(f"类型: {block_type}")
# print(f"内容: {block_content}")
# print("-" * 50)
except Exception as e:
# 如果解析失败,可能是因为格式不正确,跳过这个块
logger.warning(f"Invalid block format: {block_info}, error: {e}")
continue
span_type = "unknown"
if block_type in [
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment