Commit 4eaa85fd authored by myhloli's avatar myhloli
Browse files

refactor: update make mode constants to improve content list handling

parent c01b780b
......@@ -260,14 +260,14 @@ def union_make(pdf_info_dict: list,
if make_mode in [MakeMode.MM_MD, MakeMode.NLP_MD]:
page_markdown = make_blocks_to_markdown(paras_of_layout, make_mode, img_buket_path)
output_content.extend(page_markdown)
elif make_mode == MakeMode.STANDARD_FORMAT:
elif make_mode == MakeMode.CONTENT_LIST:
for para_block in paras_of_layout:
para_content = make_blocks_to_content_list(para_block, img_buket_path, page_idx)
output_content.append(para_content)
if make_mode in [MakeMode.MM_MD, MakeMode.NLP_MD]:
return '\n\n'.join(output_content)
elif make_mode == MakeMode.STANDARD_FORMAT:
elif make_mode == MakeMode.CONTENT_LIST:
return output_content
else:
logger.error(f"Unsupported make mode: {make_mode}")
......
......@@ -186,14 +186,14 @@ def union_make(pdf_info_dict: list,
if make_mode in [MakeMode.MM_MD, MakeMode.NLP_MD]:
page_markdown = mk_blocks_to_markdown(paras_of_layout, make_mode, img_buket_path)
output_content.extend(page_markdown)
elif make_mode == MakeMode.STANDARD_FORMAT:
elif make_mode == MakeMode.CONTENT_LIST:
for para_block in paras_of_layout:
para_content = make_blocks_to_content_list(para_block, img_buket_path, page_idx)
output_content.append(para_content)
if make_mode in [MakeMode.MM_MD, MakeMode.NLP_MD]:
return '\n\n'.join(output_content)
elif make_mode == MakeMode.STANDARD_FORMAT:
elif make_mode == MakeMode.CONTENT_LIST:
return output_content
return None
......
......@@ -143,7 +143,7 @@ def do_parse(
if f_dump_content_list:
image_dir = str(os.path.basename(local_image_dir))
content_list = pipeline_union_make(pdf_info, MakeMode.STANDARD_FORMAT, image_dir)
content_list = pipeline_union_make(pdf_info, MakeMode.CONTENT_LIST, image_dir)
md_writer.write_string(
f"{pdf_file_name}_content_list.json",
json.dumps(content_list, ensure_ascii=False, indent=4),
......@@ -200,7 +200,7 @@ def do_parse(
if f_dump_content_list:
image_dir = str(os.path.basename(local_image_dir))
content_list = vlm_union_make(pdf_info, MakeMode.STANDARD_FORMAT, image_dir)
content_list = vlm_union_make(pdf_info, MakeMode.CONTENT_LIST, image_dir)
md_writer.write_string(
f"{pdf_file_name}_content_list.json",
json.dumps(content_list, ensure_ascii=False, indent=4),
......
......@@ -42,7 +42,7 @@ class CategoryId:
class MakeMode:
MM_MD = 'mm_markdown'
NLP_MD = 'nlp_markdown'
STANDARD_FORMAT = 'standard_format'
CONTENT_LIST = 'content_list'
class ModelPath:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment