config_reader.py 3.85 KB
Newer Older
kernel.h@qq.com's avatar
kernel.h@qq.com committed
1
2
3
4
"""
根据bucket的名字返回对应的s3 AK, SK,endpoint三元组

"""
许瑞's avatar
许瑞 committed
5

6
7
8
9
10
import json
import os

from loguru import logger

11
from magic_pdf.libs.Constants import MODEL_NAME
12
13
from magic_pdf.libs.commons import parse_bucket_key

14
15
16
# 定义配置文件名常量
CONFIG_FILE_NAME = "magic-pdf.json"

kernel.h@qq.com's avatar
kernel.h@qq.com committed
17

许瑞's avatar
许瑞 committed
18
def read_config():
赵小蒙's avatar
赵小蒙 committed
19
20
    home_dir = os.path.expanduser("~")

21
    config_file = os.path.join(home_dir, CONFIG_FILE_NAME)
22
23

    if not os.path.exists(config_file):
24
        raise FileNotFoundError(f"{config_file} not found")
25

26
    with open(config_file, "r", encoding="utf-8") as f:
27
        config = json.load(f)
许瑞's avatar
许瑞 committed
28
29
30
31
32
33
34
35
    return config


def get_s3_config(bucket_name: str):
    """
    ~/magic-pdf.json 读出来
    """
    config = read_config()
36

赵小蒙's avatar
赵小蒙 committed
37
38
    bucket_info = config.get("bucket_info")
    if bucket_name not in bucket_info:
39
40
41
        access_key, secret_key, storage_endpoint = bucket_info["[default]"]
    else:
        access_key, secret_key, storage_endpoint = bucket_info[bucket_name]
42

赵小蒙's avatar
赵小蒙 committed
43
    if access_key is None or secret_key is None or storage_endpoint is None:
44
        raise Exception(f"ak, sk or endpoint not found in {CONFIG_FILE_NAME}")
45

赵小蒙's avatar
赵小蒙 committed
46
    # logger.info(f"get_s3_config: ak={access_key}, sk={secret_key}, endpoint={storage_endpoint}")
47

赵小蒙's avatar
赵小蒙 committed
48
    return access_key, secret_key, storage_endpoint
49
50


51
52
53
54
55
56
57
58
59
60
def get_s3_config_dict(path: str):
    access_key, secret_key, storage_endpoint = get_s3_config(get_bucket_name(path))
    return {"ak": access_key, "sk": secret_key, "endpoint": storage_endpoint}


def get_bucket_name(path):
    bucket, key = parse_bucket_key(path)
    return bucket


61
62
def get_local_models_dir():
    config = read_config()
63
64
    models_dir = config.get("models-dir")
    if models_dir is None:
65
        logger.warning(f"'models-dir' not found in {CONFIG_FILE_NAME}, use '/tmp/models' as default")
66
67
68
        return "/tmp/models"
    else:
        return models_dir
69
70


71
72
73
74
75
76
77
78
79
80
81
82
def get_local_layoutreader_model_dir():
    config = read_config()
    layoutreader_model_dir = config.get("layoutreader-model-dir")
    if layoutreader_model_dir is None or not os.path.exists(layoutreader_model_dir):
        home_dir = os.path.expanduser("~")
        layoutreader_at_modelscope_dir_path = os.path.join(home_dir, ".cache/modelscope/hub/ppaanngggg/layoutreader")
        logger.warning(f"'layoutreader-model-dir' not exists, use {layoutreader_at_modelscope_dir_path} as default")
        return layoutreader_at_modelscope_dir_path
    else:
        return layoutreader_model_dir


83
84
def get_device():
    config = read_config()
85
86
    device = config.get("device-mode")
    if device is None:
87
        logger.warning(f"'device-mode' not found in {CONFIG_FILE_NAME}, use 'cpu' as default")
88
89
90
        return "cpu"
    else:
        return device
91

92

93
94
95
def get_table_recog_config():
    config = read_config()
    table_config = config.get("table-config")
96
97
    if table_config is None:
        logger.warning(f"'table-config' not found in {CONFIG_FILE_NAME}, use 'False' as default")
98
        return json.loads(f'{{"model": "{MODEL_NAME.TABLE_MASTER}","enable": false, "max_time": 400}}')
99
100
    else:
        return table_config
101

102

103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
def get_layout_config():
    config = read_config()
    layout_config = config.get("layout-config")
    if layout_config is None:
        logger.warning(f"'layout-config' not found in {CONFIG_FILE_NAME}, use '{MODEL_NAME.LAYOUTLMv3}' as default")
        return json.loads(f'{{"model": "{MODEL_NAME.LAYOUTLMv3}"}}')
    else:
        return layout_config


def get_formula_config():
    config = read_config()
    formula_config = config.get("formula-config")
    if formula_config is None:
        logger.warning(f"'formula-config' not found in {CONFIG_FILE_NAME}, use 'True' as default")
        return json.loads(f'{{"mfd_model": "{MODEL_NAME.YOLO_V8_MFD}","mfr_model": "{MODEL_NAME.UniMerNet_v2_Small}","enable": true}}')
    else:
        return formula_config


许瑞's avatar
许瑞 committed
123
if __name__ == "__main__":
124
    ak, sk, endpoint = get_s3_config("llm-raw")