pyproject.toml 3.76 KB
Newer Older
1
2
3
[build-system]
requires = ["setuptools>=61.0", "wheel"]
build-backend = "setuptools.build_meta"
Jin Zhen Jiang's avatar
Jin Zhen Jiang committed
4

5
6
7
[project]
name = "mineru"
dynamic = ["version"]
8
license = { text = "AGPL-3.0" }
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
description = "A practical tool for converting PDF to Markdown"
readme = "README.md"
requires-python = ">=3.10,<3.14"
keywords = ["magic-pdf", "mineru", "MinerU", "convert", "pdf", "markdown"]
classifiers = [
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "Programming Language :: Python :: 3.13",
]
dependencies = [
    "boto3>=1.28.43",
    "click>=8.1.7",
    "loguru>=0.7.2",
    "numpy>=1.21.6",
    "pdfminer.six==20250506",
    "tqdm>=4.67.1",
    "requests",
    "httpx",
    "pillow>=11.0.0",
    "pypdfium2>=4.30.0",
    "pypdf>=5.6.0",
    "reportlab",
32
33
    "pdftext>=0.6.2",
    "modelscope>=1.26.0",
34
35
    "huggingface-hub>=0.32.4",
    "json-repair>=0.46.2",
36
37
    "opencv-python>=4.11.0.86",
    "fast-langdetect>=0.2.3,<0.3.0",
38
]
Jin Zhen Jiang's avatar
Jin Zhen Jiang committed
39

40
[project.optional-dependencies]
41
42
43
44
45
46
test = [
    "mineru[core]",
    "pytest",
    "pytest-cov",
    "beautifulsoup4"
]
47
48
49
50
vlm = [
    "transformers>=4.51.1",
    "torch>=2.6.0",
    "accelerate>=1.5.1",
51
    "pydantic",
52
53
]
sglang = [
54
    "sglang[all]>=0.4.8,<0.4.9",
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
]
pipeline = [
    "matplotlib>=3.10,<4",
    "ultralytics>=8.3.48,<9",
    "doclayout_yolo==0.0.4",
    "dill>=0.3.8,<1",
    "rapid_table>=1.0.5,<2.0.0",
    "PyYAML>=6.0.2,<7",
    "ftfy>=6.3.1,<7",
    "openai>=1.70.0,<2",
    "shapely>=2.0.7,<3",
    "pyclipper>=1.3.0,<2",
    "omegaconf>=2.3.0,<3",
    "torch>=2.2.2,!=2.5.0,!=2.5.1,<3",
    "torchvision",
    "transformers>=4.49.0,!=4.51.0,<5.0.0",
]
72
73
74
75
76
77
api = [
    "fastapi",
    "python-multipart",
    "uvicorn",
]
gradio = [
78
79
    "gradio>=5.34,<6",
    "gradio-pdf>=0.0.22",
80
]
81
core = [
82
83
    "mineru[vlm]",
    "mineru[pipeline]",
84
85
    "mineru[api]",
    "mineru[gradio]",
86
]
87
88
89
90
all = [
    "mineru[core]",
    "mineru[sglang]",
]
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
pipeline_old_linux = [
    "matplotlib>=3.10,<=3.10.1",
    "ultralytics>=8.3.48,<=8.3.104",
    "doclayout_yolo==0.0.4",
    "dill==0.3.8",
    "PyYAML==6.0.2",
    "ftfy==6.3.1",
    "openai==1.71.0",
    "shapely==2.1.0",
    "pyclipper==1.3.0.post6",
    "omegaconf==2.3.0",
    "albumentations==1.4.20",
    "rapid_table==1.0.3",
    "torch>=2.2.2,!=2.5.0,!=2.5.1,<3",
    "torchvision",
    "transformers>=4.49.0,!=4.51.0,<5.0.0",
]
Jin Zhen Jiang's avatar
Jin Zhen Jiang committed
108

109
110
111
112
113
114
[project.urls]
Home = "https://mineru.net/"
Repository = "https://github.com/opendatalab/MinerU"

[project.scripts]
mineru = "mineru.cli:client.main"
115
mineru-sglang-server = "mineru.cli.vlm_sglang_server:main"
116
mineru-models-download = "mineru.cli.models_download:download_models"
117
mineru-api = "mineru.cli.fast_api:main"
118
mineru-gradio = "mineru.cli.gradio_app:main"
119
120

[tool.setuptools.dynamic]
121
version = { attr = "mineru.version.__version__" }
122
123

[tool.setuptools.packages.find]
124
include = ["mineru*"]
125
126
127
namespaces = false

[tool.setuptools.package-data]
128
129
"mineru" = ["resources/**"]
"mineru.model.ocr.paddleocr2pytorch.pytorchocr.utils" = ["resources/**"]
130
131
132
133

[tool.setuptools]
include-package-data = true
zip-safe = false
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165

[tool.pytest.ini_options]
addopts = "-s --cov=mineru --cov-report html"

[tool.coverage.run]
command_line = "-m pytest tests/unittest/test_e2e.py"
source = ["mineru/"]
omit = [
    "*/vlm_sglang_model/*",
    "*/gradio_app.py",
    "*/models_download.py",
    "*/fast_api.py",
    "*/cli/client.py",
    "*/sglang_engine_predictor.py",
    "*/vlm_sglang_server.py",
    "*/cli_parser.py",
    "*/run_async.py"
]
[tool.coverage.html]
exclude_also = [
    'def __repr__',
    'if self.debug:',
    'if settings.DEBUG',
    'raise AssertionError',
    'raise NotImplementedError',
    'if 0:',
    'if __name__ == .__main__.:',
    'if TYPE_CHECKING:',
    'class .*\bProtocol\):',
    '@(abc\.)?abstractmethod',
]
directory = "tests/htmelcov"