download_models.py 906 Bytes
Newer Older
1
2
3
4
5
6
#!/usr/bin/env python
from huggingface_hub import snapshot_download

if __name__ == "__main__":

    mineru_patterns = [
7
        # "models/Layout/LayoutLMv3/*",
8
9
        "models/Layout/YOLO/*",
        "models/MFD/YOLO/*",
10
11
12
        "models/MFR/unimernet_hf_small_2503/*",
        # "models/TabRec/TableMaster/*",
        # "models/TabRec/StructEqTable/*",
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
    ]
    model_dir = snapshot_download(
        "opendatalab/PDF-Extract-Kit-1.0",
        allow_patterns=mineru_patterns,
        local_dir="/opt/",
    )

    layoutreader_pattern = [
        "*.json",
        "*.safetensors",
    ]
    layoutreader_model_dir = snapshot_download(
        "hantian/layoutreader",
        allow_patterns=layoutreader_pattern,
        local_dir="/opt/layoutreader/",
    )

    model_dir = model_dir + "/models"
    print(f"model_dir is: {model_dir}")
    print(f"layoutreader_model_dir is: {layoutreader_model_dir}")