pyproject.toml 3.75 KB
Newer Older
1
2
3
[build-system]
requires = ["setuptools>=61.0", "wheel"]
build-backend = "setuptools.build_meta"
Jin Zhen Jiang's avatar
Jin Zhen Jiang committed
4

5
6
7
[project]
name = "mineru"
dynamic = ["version"]
8
license = { text = "AGPL-3.0" }
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
description = "A practical tool for converting PDF to Markdown"
readme = "README.md"
requires-python = ">=3.10,<3.14"
keywords = ["magic-pdf", "mineru", "MinerU", "convert", "pdf", "markdown"]
classifiers = [
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "Programming Language :: Python :: 3.13",
]
dependencies = [
    "boto3>=1.28.43",
    "click>=8.1.7",
    "loguru>=0.7.2",
    "numpy>=1.21.6",
    "pdfminer.six==20250506",
    "tqdm>=4.67.1",
    "requests",
    "httpx",
    "pillow>=11.0.0",
    "pypdfium2>=4.30.0",
    "pypdf>=5.6.0",
    "reportlab",
32
33
    "pdftext>=0.6.2",
    "modelscope>=1.26.0",
34
35
    "huggingface-hub>=0.32.4",
    "json-repair>=0.46.2",
36
37
    "opencv-python>=4.11.0.86",
    "fast-langdetect>=0.2.3,<0.3.0",
38
]
Jin Zhen Jiang's avatar
Jin Zhen Jiang committed
39

40
[project.optional-dependencies]
41
42
43
44
test = [
    "mineru[core]",
    "pytest",
    "pytest-cov",
Sidney233's avatar
Sidney233 committed
45
    "coverage",
Sidney233's avatar
Sidney233 committed
46
47
    "beautifulsoup4",
    "fuzzywuzzy"
48
]
49
50
51
vlm = [
    "transformers>=4.51.1",
    "accelerate>=1.5.1",
52
    "pydantic",
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
]
pipeline = [
    "matplotlib>=3.10,<4",
    "ultralytics>=8.3.48,<9",
    "doclayout_yolo==0.0.4",
    "dill>=0.3.8,<1",
    "rapid_table>=1.0.5,<2.0.0",
    "PyYAML>=6.0.2,<7",
    "ftfy>=6.3.1,<7",
    "openai>=1.70.0,<2",
    "shapely>=2.0.7,<3",
    "pyclipper>=1.3.0,<2",
    "omegaconf>=2.3.0,<3",
    "transformers>=4.49.0,!=4.51.0,<5.0.0",
]
68
69
70
71
72
73
api = [
    "fastapi",
    "python-multipart",
    "uvicorn",
]
gradio = [
74
75
    "gradio>=5.34,<6",
    "gradio-pdf>=0.0.22",
76
]
77
core = [
78
79
    "mineru[vlm]",
    "mineru[pipeline]",
80
81
    "mineru[api]",
    "mineru[gradio]",
82
]
83
84
85
86
all = [
    "mineru[core]",
    "mineru[sglang]",
]
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
pipeline_old_linux = [
    "matplotlib>=3.10,<=3.10.1",
    "ultralytics>=8.3.48,<=8.3.104",
    "doclayout_yolo==0.0.4",
    "dill==0.3.8",
    "PyYAML==6.0.2",
    "ftfy==6.3.1",
    "openai==1.71.0",
    "shapely==2.1.0",
    "pyclipper==1.3.0.post6",
    "omegaconf==2.3.0",
    "albumentations==1.4.20",
    "rapid_table==1.0.3",
    "transformers>=4.49.0,!=4.51.0,<5.0.0",
]
Jin Zhen Jiang's avatar
Jin Zhen Jiang committed
102

103
[project.urls]
104
105
106
107
homepage = "https://mineru.net/"
documentation = "https://opendatalab.github.io/MinerU/"
repository = "https://github.com/opendatalab/MinerU"
issues = "https://github.com/opendatalab/MinerU/issues"
108
109
110

[project.scripts]
mineru = "mineru.cli:client.main"
111
mineru-sglang-server = "mineru.cli.vlm_sglang_server:main"
112
mineru-models-download = "mineru.cli.models_download:download_models"
113
mineru-api = "mineru.cli.fast_api:main"
114
mineru-gradio = "mineru.cli.gradio_app:main"
115
116

[tool.setuptools.dynamic]
117
version = { attr = "mineru.version.__version__" }
118
119

[tool.setuptools.packages.find]
120
include = ["mineru*"]
121
122
123
namespaces = false

[tool.setuptools.package-data]
124
125
"mineru" = ["resources/**"]
"mineru.model.ocr.paddleocr2pytorch.pytorchocr.utils" = ["resources/**"]
126
127
128
129

[tool.setuptools]
include-package-data = true
zip-safe = false
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147

[tool.pytest.ini_options]
addopts = "-s --cov=mineru --cov-report html"

[tool.coverage.run]
command_line = "-m pytest tests/unittest/test_e2e.py"
source = ["mineru/"]
omit = [
    "*/vlm_sglang_model/*",
    "*/gradio_app.py",
    "*/models_download.py",
    "*/fast_api.py",
    "*/cli/client.py",
    "*/sglang_engine_predictor.py",
    "*/vlm_sglang_server.py",
    "*/cli_parser.py",
    "*/run_async.py"
]
Sidney233's avatar
Sidney233 committed
148

149
[tool.coverage.html]
Sidney233's avatar
Sidney233 committed
150
151
152
directory = "htmlcov"

[tool.coverage.report]
153
154
155
156
157
158
159
160
161
162
163
exclude_also = [
    'def __repr__',
    'if self.debug:',
    'if settings.DEBUG',
    'raise AssertionError',
    'raise NotImplementedError',
    'if 0:',
    'if __name__ == .__main__.:',
    'if TYPE_CHECKING:',
    'class .*\bProtocol\):',
    '@(abc\.)?abstractmethod',
wangsen's avatar
wangsen committed
164
]