Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
c734f4de
Unverified
Commit
c734f4de
authored
Feb 14, 2025
by
shniubobo
Browse files
refactor(web_api): Optimize `Dockerfile`
parent
f559fd9c
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
122 additions
and
59 deletions
+122
-59
projects/web_api/Dockerfile
projects/web_api/Dockerfile
+54
-49
projects/web_api/download_models.py
projects/web_api/download_models.py
+32
-0
projects/web_api/entrypoint.sh
projects/web_api/entrypoint.sh
+5
-0
projects/web_api/magic-pdf.json
projects/web_api/magic-pdf.json
+24
-3
projects/web_api/requirements.txt
projects/web_api/requirements.txt
+7
-0
projects/web_api/start_mineru.sh
projects/web_api/start_mineru.sh
+0
-7
No files found.
projects/web_api/Dockerfile
View file @
c734f4de
# Use the official Ubuntu base image
FROM
ubuntu:22.04
FROM
python:3.10-slim-bookworm AS base
# Set environment variables to non-interactive to avoid prompts during installation
ENV
DEBIAN_FRONTEND=noninteractive
ENV
LANG C.UTF-8
WORKDIR
/app
ENV
DEBIAN_FRONTEND=noninteractive \
LANG=C.UTF-8 \
PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1 \
PIP_NO_CACHE_DIR=1
FROM
base AS build
# Update the package list and install necessary packages
RUN
apt-get
-q
update
&&
\
apt-get
-q
install
-y
--no-install-recommends
\
build-essential
\
software-properties-common
\
# gpg
\
# && add-apt-repository ppa:deadsnakes/ppa
\
&&
apt-get update
\
&&
apt-get
install
-y
\
python3.10
\
python3.10-venv
\
python3.10-distutils
\
python3-pip
\
wget
\
git
\
libgl1
\
libglib2.0-0
\
&&
apt-get clean
\
&&
rm
-rf
/var/lib/apt/lists/
*
# Set Python 3.10 as the default python3
RUN
update-alternatives
--install
/usr/bin/python3 python3 /usr/bin/python3.10 1
# Create a virtual environment for MinerU and install packages
RUN
python3
-m
venv /opt/mineru_venv
&&
\
pip config
set
global.index-url https://mirrors.aliyun.com/pypi/simple
&&
\
/bin/bash
-c
"source /opt/mineru_venv/bin/activate &&
\
pip install --upgrade pip &&
\
pip install magic-pdf[full] --extra-index-url https://myhloli.github.io/wheels/ --no-cache-dir &&
\
pip install fastapi uvicorn python-multipart --no-cache-dir &&
\
pip uninstall paddlepaddle -y &&
\
pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/ --no-cache-dir"
# Copy the configuration file template and set up the model directory
COPY
models/models /opt/models
COPY
layoutreader /opt/layoutreader
COPY
.paddleocr /root/.paddleocr
COPY
app.py /root/app.py
COPY
magic-pdf.json /root/magic-pdf.json
RUN
apt-get update
&&
\
apt-get
install
-y
--no-install-recommends
\
build-essential
&&
\
apt-get clean
&&
\
rm
-rf
/var/lib/apt/lists/
*
# Build Python dependencies
COPY
requirements.txt .
RUN
python
-m
venv /app/venv
&&
\
.
/app/venv/bin/activate
&&
\
pip
install
-r
requirements.txt
&&
\
pip uninstall
-y
paddlepaddle
&&
\
pip
install
-i
https://www.paddlepaddle.org.cn/packages/stable/cu118/
\
paddlepaddle-gpu
==
3.0.0rc1
WORKDIR
/root
# Download models
COPY
download_models.py .
RUN
.
/app/venv/bin/activate
&&
\
./download_models.py
# Create the models directory
# RUN mkdir -p /opt/models
FROM
base AS prod
# Set the entry point to activate the virtual environment and run the command line tool
# ENTRYPOINT ["/bin/bash", "-c", "source /opt/mineru_venv/bin/activate && exec \"$@\" && python3 app.py", "--"]
# Copy Python dependencies and models from the build stage
COPY
--from=build /app/venv /app/venv
COPY
--from=build /opt/models /opt/models
COPY
--from=build /opt/layoutreader /opt/layoutreader
# Update the package list and install necessary packages
RUN
apt-get update
&&
\
apt-get
install
-y
--no-install-recommends
\
libgl1
\
libglib2.0-0
\
libgomp1
&&
\
apt-get clean
&&
\
rm
-rf
/var/lib/apt/lists/
*
# Create volume for paddleocr models
RUN
mkdir
-p
/root/.paddleocr
VOLUME
[ "/root/.paddleocr" ]
# Copy the app and its configuration file
COPY
entrypoint.sh /app/entrypoint.sh
COPY
magic-pdf.json /root/magic-pdf.json
COPY
app.py /app/app.py
# Expose the port that FastAPI will run on
EXPOSE
8000
# Command to run FastAPI using Uvicorn, pointing to app.py and binding to 0.0.0.0:8000
CMD
["/bin/bash", "-c", "source /opt/mineru_venv/bin/activate && uvicorn app:app --host 0.0.0.0 --port 8000"]
\ No newline at end of file
ENTRYPOINT
[ "/app/entrypoint.sh" ]
CMD
["--host", "0.0.0.0", "--port", "8000"]
projects/web_api/download_models.py
0 → 100755
View file @
c734f4de
#!/usr/bin/env python
from
huggingface_hub
import
snapshot_download
if
__name__
==
"__main__"
:
mineru_patterns
=
[
"models/Layout/LayoutLMv3/*"
,
"models/Layout/YOLO/*"
,
"models/MFD/YOLO/*"
,
"models/MFR/unimernet_small_2501/*"
,
"models/TabRec/TableMaster/*"
,
"models/TabRec/StructEqTable/*"
,
]
model_dir
=
snapshot_download
(
"opendatalab/PDF-Extract-Kit-1.0"
,
allow_patterns
=
mineru_patterns
,
local_dir
=
"/opt/"
,
)
layoutreader_pattern
=
[
"*.json"
,
"*.safetensors"
,
]
layoutreader_model_dir
=
snapshot_download
(
"hantian/layoutreader"
,
allow_patterns
=
layoutreader_pattern
,
local_dir
=
"/opt/layoutreader/"
,
)
model_dir
=
model_dir
+
"/models"
print
(
f
"model_dir is:
{
model_dir
}
"
)
print
(
f
"layoutreader_model_dir is:
{
layoutreader_model_dir
}
"
)
projects/web_api/entrypoint.sh
0 → 100755
View file @
c734f4de
#!/usr/bin/env bash
set
-euo
pipefail
.
/app/venv/bin/activate
exec
uvicorn app:app
"
$@
"
projects/web_api/magic-pdf.json
View file @
c734f4de
...
...
@@ -7,7 +7,7 @@
"layoutreader-model-dir"
:
"/opt/layoutreader"
,
"device-mode"
:
"cuda"
,
"layout-config"
:
{
"model"
:
"layout
lmv3
"
"model"
:
"
doc
layout
_yolo
"
},
"formula-config"
:
{
"mfd_model"
:
"yolo_v8_mfd"
,
...
...
@@ -16,8 +16,29 @@
},
"table-config"
:
{
"model"
:
"rapid_table"
,
"enable"
:
false
,
"sub_model"
:
"slanet_plus"
,
"enable"
:
true
,
"max_time"
:
400
},
"config_version"
:
"1.0.0"
"llm-aided-config"
:
{
"formula_aided"
:
{
"api_key"
:
"your_api_key"
,
"base_url"
:
"https://dashscope.aliyuncs.com/compatible-mode/v1"
,
"model"
:
"qwen2.5-7b-instruct"
,
"enable"
:
false
},
"text_aided"
:
{
"api_key"
:
"your_api_key"
,
"base_url"
:
"https://dashscope.aliyuncs.com/compatible-mode/v1"
,
"model"
:
"qwen2.5-7b-instruct"
,
"enable"
:
false
},
"title_aided"
:
{
"api_key"
:
"your_api_key"
,
"base_url"
:
"https://dashscope.aliyuncs.com/compatible-mode/v1"
,
"model"
:
"qwen2.5-32b-instruct"
,
"enable"
:
false
}
},
"config_version"
:
"1.1.1"
}
projects/web_api/requirements.txt
0 → 100644
View file @
c734f4de
--extra-index-url https://myhloli.github.io/wheels/
magic-pdf[full]
fastapi
uvicorn
python-multipart
projects/web_api/start_mineru.sh
deleted
100644 → 0
View file @
f559fd9c
docker run
-itd
--name
=
mineru_server
--gpus
=
all
-p
8888:8000 quincyqiang/mineru:0.1-models /bin/bash
docker run
-itd
--name
=
mineru_server
--gpus
=
all
-p
8888:8000 quincyqiang/mineru:0.3-models
docker login
--username
=
1185918903@qq.com registry.cn-beijing.aliyuncs.com
docker tag quincyqiang/mineru:0.3-models registry.cn-beijing.aliyuncs.com/quincyqiang/gomate:0.3-models
docker push registry.cn-beijing.aliyuncs.com/quincyqiang/gomate:0.3-models
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment