setup.py_back 4.91 KB
Newer Older
赵小蒙's avatar
赵小蒙 committed
1
from pathlib import Path
赵小蒙's avatar
赵小蒙 committed
2
from setuptools import setup, find_packages
3
from mineru.version import __version__
赵小蒙's avatar
赵小蒙 committed
4
5


赵小蒙's avatar
赵小蒙 committed
6
if __name__ == '__main__':
赵小蒙's avatar
赵小蒙 committed
7
8
9
    with Path(Path(__file__).parent,
              'README.md').open(encoding='utf-8') as file:
        long_description = file.read()
赵小蒙's avatar
赵小蒙 committed
10
    setup(
11
        name="mineru",  # 项目名
赵小蒙's avatar
赵小蒙 committed
12
        version=__version__,  # 自动从tag中获取版本号
13
        license="AGPL-3.0",
14
        packages=find_packages() + ["mineru.resources"] + ["mineru.model.ocr.paddleocr2pytorch.pytorchocr.utils.resources"],  # 包含所有的包
15
        package_data={
16
17
            "mineru.resources": ["**"],  # 包含magic_pdf.resources目录下的所有文件
            "mineru.model.ocr.paddleocr2pytorch.pytorchocr.utils.resources": ["**"],  # pytorchocr.resources目录下的所有文件
18
        },
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
        install_requires=[
                    "boto3>=1.28.43",
                    "click>=8.1.7",
                    "loguru>=0.6.0",
                    "numpy>=1.21.6",
                    "pdfminer.six==20250506",
                    "tqdm>=4.67.1",
                    "requests",
                    "httpx",
                    "pillow",
                    "pypdfium2",
                    "loguru",
                    "pypdf",
                    "reportlab",
        ],  # 项目依赖的第三方库
赵小蒙's avatar
赵小蒙 committed
34
        extras_require={
35
36
37
38
39
40
41
42
            "vlm":[
                "transformers>=4.51.1",
                "torch>=2.6.0",
                "accelerate>=1.5.1"
                "pydantic>=2.7.2,<2.11",
            ],
            "sglang": [
                "sglang[all]==0.4.6.post5",
43
            ],
44
            "pipeline": [
45
46
                     "matplotlib>=3.10,<4",
                     "ultralytics>=8.3.48,<9",  # yolov8,公式检测
47
                     "doclayout_yolo==0.0.4",  # doclayout_yolo
48
                     "dill>=0.3.8,<1",  # doclayout_yolo
49
                     "rapid_table>=1.0.5,<2.0.0",  # rapid_table
50
                     "PyYAML>=6.0.2,<7",  # yaml
51
                     "ftfy>=6.3.1,<7",  # unimernet_hf
52
53
54
55
                     "openai>=1.70.0,<2",  # openai SDK
                     "shapely>=2.0.7,<3",  # imgaug-paddleocr2pytorch
                     "pyclipper>=1.3.0,<2",  # paddleocr2pytorch
                     "omegaconf>=2.3.0,<3",  # paddleocr2pytorch
56
57
58
59
                    "torch>=2.2.2,!=2.5.0,!=2.5.1,<3",
                    "torchvision",
                    "transformers>=4.49.0,!=4.51.0,<5.0.0",
                    "fast-langdetect>=0.2.3,<0.3.0",
60
            ],
61
            "pipeline_old_linux": [
62
63
                    "matplotlib>=3.10,<=3.10.1",
                    "ultralytics>=8.3.48,<=8.3.104",  # yolov8,公式检测
64
                    "doclayout_yolo==0.0.4",  # doclayout_yolo
65
                    "dill==0.3.8",  # doclayout_yolo
66
67
68
69
70
71
72
                    "PyYAML==6.0.2",  # yaml
                    "ftfy==6.3.1",  # unimernet_hf
                    "openai==1.71.0",  # openai SDK
                    "shapely==2.1.0",  # imgaug-paddleocr2pytorch
                    "pyclipper==1.3.0.post6",  # paddleocr2pytorch
                    "omegaconf==2.3.0",  # paddleocr2pytorch
                    "albumentations==1.4.20", # 1.4.21引入的simsimd不支持2019年及更早的linux系统
73
                    "rapid_table==1.0.3",  # rapid_table新版本依赖的onnxruntime不支持2019年及更早的linux系统
74
75
76
77
                    "torch>=2.2.2,!=2.5.0,!=2.5.1,<3",
                    "torchvision",
                    "transformers>=4.49.0,!=4.51.0,<5.0.0",
                    "fast-langdetect>=0.2.3,<0.3.0",
78
            ],
赵小蒙's avatar
赵小蒙 committed
79
        },
赵小蒙's avatar
赵小蒙 committed
80
81
82
        description="A practical tool for converting PDF to Markdown",  # 简短描述
        long_description=long_description,  # 详细描述
        long_description_content_type="text/markdown",  # 如果README是Markdown格式
83
84
85
86
        project_urls={
            "Home": "https://mineru.net/",
            "Repository": "https://github.com/opendatalab/MinerU",
        },
87
88
89
90
91
92
93
        keywords=["magic-pdf, mineru, MinerU, convert, pdf, markdown"],
        classifiers=[
            "Programming Language :: Python :: 3.10",
            "Programming Language :: Python :: 3.11",
            "Programming Language :: Python :: 3.12",
            "Programming Language :: Python :: 3.13",
        ],
94
        python_requires=">=3.10,<3.14",  # 项目依赖的 Python 版本
赵小蒙's avatar
update:  
赵小蒙 committed
95
96
        entry_points={
            "console_scripts": [
97
                "mineru = mineru.cli:client.main",  # 命令行入口点,mineru命令将调用mineru.cli.client.main函数
98
                "mineru-sglang-server = mineru.cli.vlm_sglang_server:main",  # sglang服务器入口点
99
                "mineru-models-download = mineru.cli.models_download:download_models",  # 模型下载入口点
赵小蒙's avatar
update:  
赵小蒙 committed
100
101
            ],
        },  # 项目提供的可执行命令
赵小蒙's avatar
赵小蒙 committed
102
103
104
        include_package_data=True,  # 是否包含非代码文件,如数据文件、配置文件等
        zip_safe=False,  # 是否使用 zip 文件格式打包,一般设为 False
    )