"...git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "c7260ce253e52f1d6402027055a6371cb6114118"
Commit 54ce594b authored by myhloli's avatar myhloli
Browse files

refactor(tools): improve code readability and maintainability

- Remove unnecessary line breaks and adjust indentation
- Update function call to use named arguments for better readability
- Modify _do_parse function call to use MakeMode.MM_MD instead of
parent d2fc9dab
...@@ -109,9 +109,7 @@ def _do_parse( ...@@ -109,9 +109,7 @@ def _do_parse(
pdf_bytes = ds._raw_data pdf_bytes = ds._raw_data
local_image_dir, local_md_dir = prepare_env(output_dir, pdf_file_name, parse_method) local_image_dir, local_md_dir = prepare_env(output_dir, pdf_file_name, parse_method)
image_writer, md_writer = FileBasedDataWriter(local_image_dir), FileBasedDataWriter( image_writer, md_writer = FileBasedDataWriter(local_image_dir), FileBasedDataWriter(local_md_dir)
local_md_dir
)
image_dir = str(os.path.basename(local_image_dir)) image_dir = str(os.path.basename(local_image_dir))
if len(model_list) == 0: if len(model_list) == 0:
...@@ -317,7 +315,26 @@ def batch_do_parse( ...@@ -317,7 +315,26 @@ def batch_do_parse(
infer_results = batch_doc_analyze(dss, parse_method, lang=lang, layout_model=layout_model, formula_enable=formula_enable, table_enable=table_enable) infer_results = batch_doc_analyze(dss, parse_method, lang=lang, layout_model=layout_model, formula_enable=formula_enable, table_enable=table_enable)
for idx, infer_result in enumerate(infer_results): for idx, infer_result in enumerate(infer_results):
_do_parse(output_dir, pdf_file_names[idx], dss[idx], infer_result.get_infer_res(), parse_method, debug_able, f_draw_span_bbox=f_draw_span_bbox, f_draw_layout_bbox=f_draw_layout_bbox, f_dump_md=f_dump_md, f_dump_middle_json=f_dump_middle_json, f_dump_model_json=f_dump_model_json, f_dump_orig_pdf=f_dump_orig_pdf, f_dump_content_list=f_dump_content_list, f_make_md_mode=f_make_md_mode, f_draw_model_bbox=f_draw_model_bbox, f_draw_line_sort_bbox=f_draw_line_sort_bbox, f_draw_char_bbox=f_draw_char_bbox, lang=lang) _do_parse(
output_dir = output_dir,
pdf_file_name = pdf_file_names[idx],
pdf_bytes_or_dataset = dss[idx],
model_list = infer_result.get_infer_res(),
parse_method = parse_method,
debug_able = debug_able,
f_draw_span_bbox = f_draw_span_bbox,
f_draw_layout_bbox = f_draw_layout_bbox,
f_dump_md=f_dump_md,
f_dump_middle_json=f_dump_middle_json,
f_dump_model_json=f_dump_model_json,
f_dump_orig_pdf=f_dump_orig_pdf,
f_dump_content_list=f_dump_content_list,
f_make_md_mode=MakeMode.MM_MD,
f_draw_model_bbox=f_draw_model_bbox,
f_draw_line_sort_bbox=f_draw_line_sort_bbox,
f_draw_char_bbox=f_draw_char_bbox,
lang=lang,
)
parse_pdf_methods = click.Choice(['ocr', 'txt', 'auto']) parse_pdf_methods = click.Choice(['ocr', 'txt', 'auto'])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment