Commit 6e35e382 authored by myhloli's avatar myhloli
Browse files

feat(models): add PaddleOCR model and update download scripts

- Add PaddleOCR model to the list of models to download
- Update download_models.py and download_models_hf.py scripts to include PaddleOCR
- Create user directory for PaddleOCR and copy downloaded models
parent 2f3b66a5
import json import json
import shutil
import os import os
import requests import requests
...@@ -36,6 +37,7 @@ if __name__ == '__main__': ...@@ -36,6 +37,7 @@ if __name__ == '__main__':
"models/Layout/YOLO/*", "models/Layout/YOLO/*",
"models/MFD/YOLO/*", "models/MFD/YOLO/*",
"models/MFR/unimernet_hf_small_2503/*", "models/MFR/unimernet_hf_small_2503/*",
"models/OCR/paddleocr/*",
# "models/TabRec/TableMaster/*", # "models/TabRec/TableMaster/*",
# "models/TabRec/StructEqTable/*", # "models/TabRec/StructEqTable/*",
] ]
...@@ -45,6 +47,12 @@ if __name__ == '__main__': ...@@ -45,6 +47,12 @@ if __name__ == '__main__':
print(f'model_dir is: {model_dir}') print(f'model_dir is: {model_dir}')
print(f'layoutreader_model_dir is: {layoutreader_model_dir}') print(f'layoutreader_model_dir is: {layoutreader_model_dir}')
paddleocr_model_dir = model_dir + '/OCR/paddleocr'
user_paddleocr_dir = os.path.expanduser('~/.paddleocr')
if os.path.exists(user_paddleocr_dir):
shutil.rmtree(user_paddleocr_dir)
shutil.copytree(paddleocr_model_dir, user_paddleocr_dir)
json_url = 'https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/magic-pdf.template.json' json_url = 'https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/magic-pdf.template.json'
config_file_name = 'magic-pdf.json' config_file_name = 'magic-pdf.json'
home_dir = os.path.expanduser('~') home_dir = os.path.expanduser('~')
......
import json import json
import os import os
import shutil
import requests import requests
from huggingface_hub import snapshot_download from huggingface_hub import snapshot_download
...@@ -37,6 +38,7 @@ if __name__ == '__main__': ...@@ -37,6 +38,7 @@ if __name__ == '__main__':
"models/Layout/YOLO/*", "models/Layout/YOLO/*",
"models/MFD/YOLO/*", "models/MFD/YOLO/*",
"models/MFR/unimernet_hf_small_2503/*", "models/MFR/unimernet_hf_small_2503/*",
"models/OCR/paddleocr/*",
# "models/TabRec/TableMaster/*", # "models/TabRec/TableMaster/*",
# "models/TabRec/StructEqTable/*", # "models/TabRec/StructEqTable/*",
] ]
...@@ -52,6 +54,12 @@ if __name__ == '__main__': ...@@ -52,6 +54,12 @@ if __name__ == '__main__':
print(f'model_dir is: {model_dir}') print(f'model_dir is: {model_dir}')
print(f'layoutreader_model_dir is: {layoutreader_model_dir}') print(f'layoutreader_model_dir is: {layoutreader_model_dir}')
paddleocr_model_dir = model_dir + '/OCR/paddleocr'
user_paddleocr_dir = os.path.expanduser('~/.paddleocr')
if os.path.exists(user_paddleocr_dir):
shutil.rmtree(user_paddleocr_dir)
shutil.copytree(paddleocr_model_dir, user_paddleocr_dir)
json_url = 'https://github.com/opendatalab/MinerU/raw/master/magic-pdf.template.json' json_url = 'https://github.com/opendatalab/MinerU/raw/master/magic-pdf.template.json'
config_file_name = 'magic-pdf.json' config_file_name = 'magic-pdf.json'
home_dir = os.path.expanduser('~') home_dir = os.path.expanduser('~')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment