"src/git@developer.sourcefind.cn:OpenDAS/nni.git" did not exist on "bf1c79a6ad9a89e6c122f4d374c3593be7b09ee8"
Commit f6cef312 authored by Joseph Young's avatar Joseph Young
Browse files

Optimize Dockerfile for CUDA support

Refactored the Dockerfile to better organize and streamline environment variable settings, emphasizing support for a CUDA-based WebUI backend while retaining the ability to build a CPU-only image. Consolidated ENV commands to reduce layers, improving build efficiency, and set a default PORT environment to enhance container usability. Enabled exposure of the backend service on port 8080 and leveraged combined RUN directives to minimize the image footprint. These changes facilitate a more robust deployment process, catering to both CPU and CUDA environments.
parent 75a40dea
...@@ -11,48 +11,53 @@ RUN npm ci ...@@ -11,48 +11,53 @@ RUN npm ci
COPY . . COPY . .
RUN npm run build RUN npm run build
######## WebUI backend ######## ######## CPU-only WebUI backend ########
# To support both CPU and GPU backend, we need to keep the ability to build the CPU-only image.
#FROM python:3.11-slim-bookworm as base
#FROM --platform=linux/amd64 ubuntu:22.04 AS cpu-builder-amd64
#FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu-build-amd64
#RUN OPENWEBUI_CPU_TARGET="cpu" sh gen_linux.sh
#FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx-build-amd64
#RUN OPENWEBUI_CPU_TARGET="cpu_avx" sh gen_linux.sh
#FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx2-build-amd64
#RUN OPENWEBUI_CPU_TARGET="cpu_avx2" sh gen_linux.sh
######## CUDA WebUI backend ########
ARG CUDA_VERSION=12.3.2 ARG CUDA_VERSION=12.3.2
#FROM nvidia/cuda:$CUDA_VERSION-devel-ubuntu22.04 as base
FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-ubuntu22.04 AS cuda-build-amd64 FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-ubuntu22.04 AS cuda-build-amd64
# Set environment variables for NVIDIA Container Toolkit # Set environment variables for NVIDIA Container Toolkit
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64 ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64 \
ENV NVIDIA_DRIVER_CAPABILITIES=all NVIDIA_DRIVER_CAPABILITIES=all \
ENV NVIDIA_VISIBLE_DEVICES=all NVIDIA_VISIBLE_DEVICES=all
# Install NVIDIA CUDA toolkit and libraries in the container
#RUN apt-get update && \
# apt-get install -y --no-install-recommends nvidia-cuda-toolkit nvidia-cuda-dev nvidia-cudnn-dev
ENV ENV=prod
ENV PORT ""
ENV OLLAMA_BASE_URL "/ollama" ENV ENV=prod \
PORT=8080
ENV OPENAI_API_BASE_URL "" ## Base URL Config ##
ENV OPENAI_API_KEY "" ENV OLLAMA_BASE_URL="/ollama" \
OPENAI_API_BASE_URL=""
ENV WEBUI_SECRET_KEY "" ## API Key and Security Config ##
ENV OPENAI_API_KEY="" \
ENV SCARF_NO_ANALYTICS true WEBUI_SECRET_KEY="" \
ENV DO_NOT_TRACK true SCARF_NO_ANALYTICS=true \
DO_NOT_TRACK=true
######## Preloaded models ######## ######## Preloaded models ########
# whisper TTS Settings # whisper TTS Settings
ENV WHISPER_MODEL="base" ENV WHISPER_MODEL="base" \
ENV WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models" WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models"
# RAG Embedding Model Settings # RAG Embedding Model Settings
# any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers # any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers
# Leaderboard: https://huggingface.co/spaces/mteb/leaderboard # Leaderboard: https://huggingface.co/spaces/mteb/leaderboard
# for better performance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB) # for better performance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB)
# IMPORTANT: If you change the default model (all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them. # IMPORTANT: If you change the default model (all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them.
ENV RAG_EMBEDDING_MODEL="all-MiniLM-L6-v2" ENV RAG_EMBEDDING_MODEL="all-MiniLM-L6-v2" \
# device type for whisper tts and embedding models - "cpu" (default), "cuda" (NVIDIA GPU and CUDA required), or "mps" (apple silicon) - choosing this right can lead to better performance # device type for whisper tts and embedding models - "cpu" (default), "cuda" (NVIDIA GPU and CUDA required), or "mps" (apple silicon) - choosing this right can lead to better performance
ENV RAG_EMBEDDING_MODEL_DEVICE_TYPE="cuda" RAG_EMBEDDING_MODEL_DEVICE_TYPE="cuda" \
ENV RAG_EMBEDDING_MODEL_DIR="/app/backend/data/cache/embedding/models" RAG_EMBEDDING_MODEL_DIR="/app/backend/data/cache/embedding/models" \
ENV SENTENCE_TRANSFORMERS_HOME $RAG_EMBEDDING_MODEL_DIR SENTENCE_TRANSFORMERS_HOME=$RAG_EMBEDDING_MODEL_DIR
######## Preloaded models ######## ######## Preloaded models ########
WORKDIR /app/backend WORKDIR /app/backend
...@@ -63,12 +68,8 @@ RUN apt-get update && \ ...@@ -63,12 +68,8 @@ RUN apt-get update && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
COPY ./backend/requirements.txt ./requirements.txt COPY ./backend/requirements.txt ./requirements.txt
RUN pip3 install torch torchvision torchaudio --no-cache-dir RUN pip3 install torch torchvision torchaudio --no-cache-dir && \
RUN pip3 install -r requirements.txt --no-cache-dir pip3 install -r requirements.txt --no-cache-dir
# copy embedding weight from build
RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2
COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx
# copy built frontend files # copy built frontend files
COPY --from=build /app/build /app/build COPY --from=build /app/build /app/build
...@@ -78,4 +79,6 @@ COPY --from=build /app/package.json /app/package.json ...@@ -78,4 +79,6 @@ COPY --from=build /app/package.json /app/package.json
# copy backend files # copy backend files
COPY ./backend . COPY ./backend .
EXPOSE 8080
CMD [ "bash", "start.sh"] CMD [ "bash", "start.sh"]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment