Unverified Commit c734f4de authored by shniubobo's avatar shniubobo
Browse files

refactor(web_api): Optimize `Dockerfile`

parent f559fd9c
# Use the official Ubuntu base image
FROM ubuntu:22.04
FROM python:3.10-slim-bookworm AS base
# Set environment variables to non-interactive to avoid prompts during installation
ENV DEBIAN_FRONTEND=noninteractive
ENV LANG C.UTF-8
WORKDIR /app
ENV DEBIAN_FRONTEND=noninteractive \
LANG=C.UTF-8 \
PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1 \
PIP_NO_CACHE_DIR=1
FROM base AS build
# Update the package list and install necessary packages
RUN apt-get -q update && \
apt-get -q install -y --no-install-recommends \
build-essential \
software-properties-common \
# gpg \
# && add-apt-repository ppa:deadsnakes/ppa \
&& apt-get update \
&& apt-get install -y \
python3.10 \
python3.10-venv \
python3.10-distutils \
python3-pip \
wget \
git \
libgl1 \
libglib2.0-0 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Set Python 3.10 as the default python3
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1
# Create a virtual environment for MinerU and install packages
RUN python3 -m venv /opt/mineru_venv && \
pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \
/bin/bash -c "source /opt/mineru_venv/bin/activate && \
pip install --upgrade pip && \
pip install magic-pdf[full] --extra-index-url https://myhloli.github.io/wheels/ --no-cache-dir && \
pip install fastapi uvicorn python-multipart --no-cache-dir && \
pip uninstall paddlepaddle -y && \
pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/ --no-cache-dir"
# Copy the configuration file template and set up the model directory
COPY models/models /opt/models
COPY layoutreader /opt/layoutreader
COPY .paddleocr /root/.paddleocr
COPY app.py /root/app.py
COPY magic-pdf.json /root/magic-pdf.json
RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Build Python dependencies
COPY requirements.txt .
RUN python -m venv /app/venv && \
. /app/venv/bin/activate && \
pip install -r requirements.txt && \
pip uninstall -y paddlepaddle && \
pip install -i https://www.paddlepaddle.org.cn/packages/stable/cu118/ \
paddlepaddle-gpu==3.0.0rc1
WORKDIR /root
# Download models
COPY download_models.py .
RUN . /app/venv/bin/activate && \
./download_models.py
# Create the models directory
# RUN mkdir -p /opt/models
FROM base AS prod
# Set the entry point to activate the virtual environment and run the command line tool
# ENTRYPOINT ["/bin/bash", "-c", "source /opt/mineru_venv/bin/activate && exec \"$@\" && python3 app.py", "--"]
# Copy Python dependencies and models from the build stage
COPY --from=build /app/venv /app/venv
COPY --from=build /opt/models /opt/models
COPY --from=build /opt/layoutreader /opt/layoutreader
# Update the package list and install necessary packages
RUN apt-get update && \
apt-get install -y --no-install-recommends \
libgl1 \
libglib2.0-0 \
libgomp1 && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Create volume for paddleocr models
RUN mkdir -p /root/.paddleocr
VOLUME [ "/root/.paddleocr" ]
# Copy the app and its configuration file
COPY entrypoint.sh /app/entrypoint.sh
COPY magic-pdf.json /root/magic-pdf.json
COPY app.py /app/app.py
# Expose the port that FastAPI will run on
EXPOSE 8000
# Command to run FastAPI using Uvicorn, pointing to app.py and binding to 0.0.0.0:8000
CMD ["/bin/bash", "-c", "source /opt/mineru_venv/bin/activate && uvicorn app:app --host 0.0.0.0 --port 8000"]
\ No newline at end of file
ENTRYPOINT [ "/app/entrypoint.sh" ]
CMD ["--host", "0.0.0.0", "--port", "8000"]
#!/usr/bin/env python
from huggingface_hub import snapshot_download
if __name__ == "__main__":
mineru_patterns = [
"models/Layout/LayoutLMv3/*",
"models/Layout/YOLO/*",
"models/MFD/YOLO/*",
"models/MFR/unimernet_small_2501/*",
"models/TabRec/TableMaster/*",
"models/TabRec/StructEqTable/*",
]
model_dir = snapshot_download(
"opendatalab/PDF-Extract-Kit-1.0",
allow_patterns=mineru_patterns,
local_dir="/opt/",
)
layoutreader_pattern = [
"*.json",
"*.safetensors",
]
layoutreader_model_dir = snapshot_download(
"hantian/layoutreader",
allow_patterns=layoutreader_pattern,
local_dir="/opt/layoutreader/",
)
model_dir = model_dir + "/models"
print(f"model_dir is: {model_dir}")
print(f"layoutreader_model_dir is: {layoutreader_model_dir}")
#!/usr/bin/env bash
set -euo pipefail
. /app/venv/bin/activate
exec uvicorn app:app "$@"
......@@ -7,7 +7,7 @@
"layoutreader-model-dir":"/opt/layoutreader",
"device-mode":"cuda",
"layout-config": {
"model": "layoutlmv3"
"model": "doclayout_yolo"
},
"formula-config": {
"mfd_model": "yolo_v8_mfd",
......@@ -16,8 +16,29 @@
},
"table-config": {
"model": "rapid_table",
"enable": false,
"sub_model": "slanet_plus",
"enable": true,
"max_time": 400
},
"config_version": "1.0.0"
"llm-aided-config": {
"formula_aided": {
"api_key": "your_api_key",
"base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
"model": "qwen2.5-7b-instruct",
"enable": false
},
"text_aided": {
"api_key": "your_api_key",
"base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
"model": "qwen2.5-7b-instruct",
"enable": false
},
"title_aided": {
"api_key": "your_api_key",
"base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
"model": "qwen2.5-32b-instruct",
"enable": false
}
},
"config_version": "1.1.1"
}
--extra-index-url https://myhloli.github.io/wheels/
magic-pdf[full]
fastapi
uvicorn
python-multipart
docker run -itd --name=mineru_server --gpus=all -p 8888:8000 quincyqiang/mineru:0.1-models /bin/bash
docker run -itd --name=mineru_server --gpus=all -p 8888:8000 quincyqiang/mineru:0.3-models
docker login --username=1185918903@qq.com registry.cn-beijing.aliyuncs.com
docker tag quincyqiang/mineru:0.3-models registry.cn-beijing.aliyuncs.com/quincyqiang/gomate:0.3-models
docker push registry.cn-beijing.aliyuncs.com/quincyqiang/gomate:0.3-models
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment