"vscode:/vscode.git/clone" did not exist on "5200753a609bc08a03ee6fd6b608149adc8c625c"
Commit b492c19c authored by icecraft's avatar icecraft
Browse files

refactor: move some constants or enums defs to config folder

parent bc992433
from loguru import logger from loguru import logger
from magic_pdf.libs.drop_reason import DropReason from magic_pdf.config.drop_reason import DropReason
def get_data_source(jso: dict): def get_data_source(jso: dict):
data_source = jso.get("data_source") data_source = jso.get('data_source')
if data_source is None: if data_source is None:
data_source = jso.get("file_source") data_source = jso.get('file_source')
return data_source return data_source
def get_data_type(jso: dict): def get_data_type(jso: dict):
data_type = jso.get("data_type") data_type = jso.get('data_type')
if data_type is None: if data_type is None:
data_type = jso.get("file_type") data_type = jso.get('file_type')
return data_type return data_type
def get_bookid(jso: dict): def get_bookid(jso: dict):
book_id = jso.get("bookid") book_id = jso.get('bookid')
if book_id is None: if book_id is None:
book_id = jso.get("original_file_id") book_id = jso.get('original_file_id')
return book_id return book_id
def exception_handler(jso: dict, e): def exception_handler(jso: dict, e):
logger.exception(e) logger.exception(e)
jso["_need_drop"] = True jso['_need_drop'] = True
jso["_drop_reason"] = DropReason.Exception jso['_drop_reason'] = DropReason.Exception
jso["_exception"] = f"ERROR: {e}" jso['_exception'] = f'ERROR: {e}'
return jso return jso
def get_bookname(jso: dict): def get_bookname(jso: dict):
data_source = get_data_source(jso) data_source = get_data_source(jso)
file_id = jso.get("file_id") file_id = jso.get('file_id')
book_name = f"{data_source}/{file_id}" book_name = f'{data_source}/{file_id}'
return book_name return book_name
def spark_json_extractor(jso: dict) -> dict: def spark_json_extractor(jso: dict) -> dict:
""" """从json中提取数据,返回一个dict."""
从json中提取数据,返回一个dict
"""
return { return {
"_pdf_type": jso["_pdf_type"], '_pdf_type': jso['_pdf_type'],
"model_list": jso["doc_layout_result"], 'model_list': jso['doc_layout_result'],
} }
...@@ -7,10 +7,10 @@ import fitz ...@@ -7,10 +7,10 @@ import fitz
from loguru import logger from loguru import logger
import magic_pdf.model as model_config import magic_pdf.model as model_config
from magic_pdf.config.make_content_config import DropMode, MakeMode
from magic_pdf.data.data_reader_writer import FileBasedDataWriter from magic_pdf.data.data_reader_writer import FileBasedDataWriter
from magic_pdf.libs.draw_bbox import (draw_layout_bbox, draw_line_sort_bbox, from magic_pdf.libs.draw_bbox import (draw_layout_bbox, draw_line_sort_bbox,
draw_model_bbox, draw_span_bbox) draw_model_bbox, draw_span_bbox)
from magic_pdf.libs.MakeContentConfig import DropMode, MakeMode
from magic_pdf.pipe.OCRPipe import OCRPipe from magic_pdf.pipe.OCRPipe import OCRPipe
from magic_pdf.pipe.TXTPipe import TXTPipe from magic_pdf.pipe.TXTPipe import TXTPipe
from magic_pdf.pipe.UNIPipe import UNIPipe from magic_pdf.pipe.UNIPipe import UNIPipe
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment