pyproject.toml 3.77 KB
Newer Older
1
2
3
[build-system]
requires = ["setuptools>=61.0", "wheel"]
build-backend = "setuptools.build_meta"
Jin Zhen Jiang's avatar
Jin Zhen Jiang committed
4

5
6
7
[project]
name = "mineru"
dynamic = ["version"]
8
license = { text = "AGPL-3.0" }
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
description = "A practical tool for converting PDF to Markdown"
readme = "README.md"
requires-python = ">=3.10,<3.14"
keywords = ["magic-pdf", "mineru", "MinerU", "convert", "pdf", "markdown"]
classifiers = [
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "Programming Language :: Python :: 3.13",
]
dependencies = [
    "boto3>=1.28.43",
    "click>=8.1.7",
    "loguru>=0.7.2",
    "numpy>=1.21.6",
    "pdfminer.six==20250506",
    "tqdm>=4.67.1",
    "requests",
    "httpx",
    "pillow>=11.0.0",
    "pypdfium2>=4.30.0",
    "pypdf>=5.6.0",
    "reportlab",
32
33
    "pdftext>=0.6.2",
    "modelscope>=1.26.0",
34
35
    "huggingface-hub>=0.32.4",
    "json-repair>=0.46.2",
36
37
    "opencv-python>=4.11.0.86",
    "fast-langdetect>=0.2.3,<0.3.0",
38
]
Jin Zhen Jiang's avatar
Jin Zhen Jiang committed
39

40
[project.optional-dependencies]
41
42
43
44
test = [
    "mineru[core]",
    "pytest",
    "pytest-cov",
Sidney233's avatar
Sidney233 committed
45
    "coverage",
46
47
    "beautifulsoup4"
]
48
49
50
51
vlm = [
    "transformers>=4.51.1",
    "torch>=2.6.0",
    "accelerate>=1.5.1",
52
    "pydantic",
53
54
]
sglang = [
55
    "sglang[all]>=0.4.8,<0.4.9",
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
]
pipeline = [
    "matplotlib>=3.10,<4",
    "ultralytics>=8.3.48,<9",
    "doclayout_yolo==0.0.4",
    "dill>=0.3.8,<1",
    "rapid_table>=1.0.5,<2.0.0",
    "PyYAML>=6.0.2,<7",
    "ftfy>=6.3.1,<7",
    "openai>=1.70.0,<2",
    "shapely>=2.0.7,<3",
    "pyclipper>=1.3.0,<2",
    "omegaconf>=2.3.0,<3",
    "torch>=2.2.2,!=2.5.0,!=2.5.1,<3",
    "torchvision",
    "transformers>=4.49.0,!=4.51.0,<5.0.0",
]
73
74
75
76
77
78
api = [
    "fastapi",
    "python-multipart",
    "uvicorn",
]
gradio = [
79
80
    "gradio>=5.34,<6",
    "gradio-pdf>=0.0.22",
81
]
82
core = [
83
84
    "mineru[vlm]",
    "mineru[pipeline]",
85
86
    "mineru[api]",
    "mineru[gradio]",
87
]
88
89
90
91
all = [
    "mineru[core]",
    "mineru[sglang]",
]
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
pipeline_old_linux = [
    "matplotlib>=3.10,<=3.10.1",
    "ultralytics>=8.3.48,<=8.3.104",
    "doclayout_yolo==0.0.4",
    "dill==0.3.8",
    "PyYAML==6.0.2",
    "ftfy==6.3.1",
    "openai==1.71.0",
    "shapely==2.1.0",
    "pyclipper==1.3.0.post6",
    "omegaconf==2.3.0",
    "albumentations==1.4.20",
    "rapid_table==1.0.3",
    "torch>=2.2.2,!=2.5.0,!=2.5.1,<3",
    "torchvision",
    "transformers>=4.49.0,!=4.51.0,<5.0.0",
]
Jin Zhen Jiang's avatar
Jin Zhen Jiang committed
109

110
111
112
113
114
115
[project.urls]
Home = "https://mineru.net/"
Repository = "https://github.com/opendatalab/MinerU"

[project.scripts]
mineru = "mineru.cli:client.main"
116
mineru-sglang-server = "mineru.cli.vlm_sglang_server:main"
117
mineru-models-download = "mineru.cli.models_download:download_models"
118
mineru-api = "mineru.cli.fast_api:main"
119
mineru-gradio = "mineru.cli.gradio_app:main"
120
121

[tool.setuptools.dynamic]
122
version = { attr = "mineru.version.__version__" }
123
124

[tool.setuptools.packages.find]
125
include = ["mineru*"]
126
127
128
namespaces = false

[tool.setuptools.package-data]
129
130
"mineru" = ["resources/**"]
"mineru.model.ocr.paddleocr2pytorch.pytorchocr.utils" = ["resources/**"]
131
132
133
134

[tool.setuptools]
include-package-data = true
zip-safe = false
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165

[tool.pytest.ini_options]
addopts = "-s --cov=mineru --cov-report html"

[tool.coverage.run]
command_line = "-m pytest tests/unittest/test_e2e.py"
source = ["mineru/"]
omit = [
    "*/vlm_sglang_model/*",
    "*/gradio_app.py",
    "*/models_download.py",
    "*/fast_api.py",
    "*/cli/client.py",
    "*/sglang_engine_predictor.py",
    "*/vlm_sglang_server.py",
    "*/cli_parser.py",
    "*/run_async.py"
]
[tool.coverage.html]
exclude_also = [
    'def __repr__',
    'if self.debug:',
    'if settings.DEBUG',
    'raise AssertionError',
    'raise NotImplementedError',
    'if 0:',
    'if __name__ == .__main__.:',
    'if TYPE_CHECKING:',
    'class .*\bProtocol\):',
    '@(abc\.)?abstractmethod',
]
Sidney233's avatar
Sidney233 committed
166
directory = "htmlcov"