Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
MinerU
Commits
dcc803e7
Commit
dcc803e7
authored
Nov 20, 2024
by
chenpangpang
Browse files
feat: duc初始提交
parent
99eff16d
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
5 additions
and
10 deletions
+5
-10
Dockerfile
Dockerfile
+3
-3
MinerU/app.py
MinerU/app.py
+2
-2
MinerU/requirements.txt
MinerU/requirements.txt
+0
-5
No files found.
Dockerfile
View file @
dcc803e7
FROM
image.sourcefind.cn:5000/
gp
u/admin/base/jupyterlab-pytorch:2.3.
1-py3.10-cuda11.8-ubuntu22.04
-devel as base
FROM
image.sourcefind.cn:5000/
dc
u/admin/base/jupyterlab-pytorch:2.3.
0-ubuntu22.04-dtk24.04.2-py3.10
-devel as base
ARG
IMAGE=mineru
ARG
IMAGE_UPPER=MinerU
ARG
BRANCH=
gp
u
ARG
BRANCH=
dc
u
RUN
cd
/root
&&
git clone
-b
$BRANCH
http://developer.hpccube.com/codes/chenpangpang/
$IMAGE
.git
WORKDIR
/root/$IMAGE/$IMAGE_UPPER
RUN
pip
install
-r
requirements.txt
...
...
@@ -10,7 +10,7 @@ RUN pip install -r requirements.txt
#########
# Prod #
#########
FROM
image.sourcefind.cn:5000/
gp
u/admin/base/jupyterlab-pytorch:2.3.
1-py3.10-cuda11.8-ubuntu22.04
-devel
FROM
image.sourcefind.cn:5000/
dc
u/admin/base/jupyterlab-pytorch:2.3.
0-ubuntu22.04-dtk24.04.2-py3.10
-devel
ARG
IMAGE=mineru
ARG
IMAGE_UPPER=MinerU
COPY
chenyh/$IMAGE/frpc_linux_amd64_* /opt/conda/lib/python3.10/dist-packages/gradio/
...
...
MinerU/app.py
View file @
dcc803e7
...
...
@@ -15,9 +15,9 @@ os.system('pip install git+https://github.com/opendatalab/MinerU.git@dev')
os
.
system
(
'wget https://github.com/opendatalab/MinerU/raw/master/scripts/download_models_hf.py -O download_models_hf.py'
)
os
.
system
(
'python download_models_hf.py'
)
os
.
system
(
"sed -i 's|cpu|cuda|g' /home/user/magic-pdf.json"
)
os
.
system
(
"sed -i 's|tablemaster|rapid_table|g' /
home/user
/magic-pdf.json"
)
os
.
system
(
"sed -i 's|tablemaster|rapid_table|g' /
root
/magic-pdf.json"
)
os
.
system
(
'cp -r paddleocr /
home/user
/.paddleocr'
)
os
.
system
(
'cp -r paddleocr /
root
/.paddleocr'
)
os
.
system
(
"pip install gradio-pdf==0.0.17"
)
from
gradio_pdf
import
PDF
...
...
MinerU/requirements.txt
View file @
dcc803e7
boto3>=1.28.43
Brotli>=1.1.0
click>=8.1.7
PyMuPDF>=1.24.9
loguru>=0.6.0
numpy>=1.21.6,<2.0.0
fast-langdetect==0.2.0
scikit-learn>=1.0.2
pdfminer.six==20231228
unimernet==0.2.1
doclayout_yolo==0.0.2
matplotlib
ultralytics
paddleocr==2.7.3
paddlepaddle-gpu @ https://paddle-whl.bj.bcebos.com/stable/cu118/paddlepaddle-gpu/paddlepaddle_gpu-3.0.0b1-cp310-cp310-linux_x86_64.whl
pypandoc
struct-eqtable==0.3.2
detectron2 @ https://wheels-1251341229.cos.ap-shanghai.myqcloud.com/assets/whl/detectron2/detectron2-0.6-cp310-cp310-linux_x86_64.whl
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment