Commit a296ea41 authored by myhloli's avatar myhloli
Browse files

refactor(magic_pdf): optimize environment setup and dependencies

- Add environment variables to disable albumentations and yolo updates
- Import torchtext and disable deprecation warnings
- Update unimernet to 0.2.2
- Specify ultralytics version as >=8.3.48
- Remove upper version limit for torch
parent 5c3bf21e
import os
import time
import fitz
import numpy as np
from loguru import logger
os.environ['NO_ALBUMENTATIONS_UPDATE'] = '1' # 禁止albumentations检查更新
os.environ['YOLO_VERBOSE'] = 'False' # disable yolo logger
try:
import torchtext
if torchtext.__version__ >= '0.18.0':
torchtext.disable_torchtext_deprecation_warning()
except ImportError:
pass
import magic_pdf.model as model_config
from magic_pdf.data.dataset import Dataset
from magic_pdf.libs.clean_memory import clean_memory
......
......@@ -7,9 +7,9 @@ numpy>=1.21.6,<2.0.0
fast-langdetect==0.2.0
scikit-learn>=1.0.2
pdfminer.six==20231228
unimernet==0.2.1
unimernet==0.2.2
matplotlib
ultralytics
ultralytics>=8.3.48
paddleocr==2.7.3
paddlepaddle==3.0.0b1
struct-eqtable==0.3.2
......
......@@ -7,7 +7,7 @@ numpy>=1.21.6,<2.0.0
pydantic>=2.7.2,<2.8.0
PyMuPDF>=1.24.9
scikit-learn>=1.0.2
torch>=2.2.2,<=2.3.1
torch>=2.2.2
transformers
# pdfminer.six==20231228
# The requirements.txt must ensure that only necessary external dependencies are introduced. If there are new dependencies to add, please contact the project administrator.
......@@ -36,10 +36,10 @@ if __name__ == '__main__':
"paddlepaddle==3.0.0b1;platform_system=='Linux'",
"paddlepaddle==2.6.1;platform_system=='Windows' or platform_system=='Darwin'",
],
"full": ["unimernet==0.2.1", # unimernet升级0.2.1
"full": ["unimernet==0.2.2", # unimernet升级0.2.1
"matplotlib<=3.9.0;platform_system=='Windows'", # 3.9.1及之后不提供windows的预编译包,避免一些没有编译环境的windows设备安装失败
"matplotlib;platform_system=='Linux' or platform_system=='Darwin'", # linux 和 macos 不应限制matplotlib的最高版本,以避免无法更新导致的一些bug
"ultralytics>=8.3.47", # yolov8,公式检测
"ultralytics>=8.3.48", # yolov8,公式检测
"paddleocr==2.7.3", # 2.8.0及2.8.1版本与detectron2有冲突,需锁定2.7.3
"paddlepaddle==3.0.0b1;platform_system=='Linux'", # 解决linux的段异常问题
"paddlepaddle==2.6.1;platform_system=='Windows' or platform_system=='Darwin'", # windows版本3.0.0b1效率下降,需锁定2.6.1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment