Dockerfile 6.35 KB
Newer Older
Timothy J. Baek's avatar
Timothy J. Baek committed
1
# syntax=docker/dockerfile:1
2
# Initialize device type args
Jannik Streidl's avatar
grammar  
Jannik Streidl committed
3
# use build args in the docker build commmand with --build-arg="BUILDARG=true"
4
ARG USE_CUDA=false
5
6
ARG USE_CUDA_VER=cu121
ARG USE_EMBEDDING_MODEL=all-MiniLM-L6-v2
7
ARG USE_MPS=false
8
ARG INCLUDE_OLLAMA=false
Timothy J. Baek's avatar
Timothy J. Baek committed
9

Jannik Streidl's avatar
Jannik Streidl committed
10
######## WebUI frontend ########
Jannik S's avatar
Jannik S committed
11
FROM node:21-alpine3.19 as build
12

Timothy J. Baek's avatar
Timothy J. Baek committed
13
14
WORKDIR /app

15
16
17
18
#RUN apt-get update \ 
#    && apt-get install -y --no-install-recommends wget \ 
#    # cleanup
#    && rm -rf /var/lib/apt/lists/*
Jannik Streidl's avatar
Jannik Streidl committed
19

Timothy J. Baek's avatar
Timothy J. Baek committed
20
# wget embedding model weight from alpine (does not exist from slim-buster)
Jannik Streidl's avatar
Jannik Streidl committed
21
22
#RUN wget "https://chroma-onnx-models.s3.amazonaws.com/all-MiniLM-L6-v2/onnx.tar.gz" -O - | \
#    tar -xzf - -C /app
Timothy J. Baek's avatar
Timothy J. Baek committed
23

Xiaodong Ye's avatar
Xiaodong Ye committed
24
COPY package.json package-lock.json ./
25
RUN npm ci
26

27
28
COPY . .
RUN npm run build
Timothy J. Baek's avatar
Timothy J. Baek committed
29

Jannik Streidl's avatar
Jannik Streidl committed
30
######## WebUI backend ########
Timothy J. Baek's avatar
Timothy J. Baek committed
31
FROM python:3.11-slim-bookworm as base
Timothy J. Baek's avatar
Timothy J. Baek committed
32

33
ARG USE_CUDA
34
35
ARG USE_CUDA_VER
ARG USE_EMBEDDING_MODEL
36
ARG USE_MPS
37
ARG INCLUDE_OLLAMA
38

Jannik Streidl's avatar
Jannik Streidl committed
39
40
## Basis ##
ENV ENV=prod \
41
    PORT=8080 \
Jannik Streidl's avatar
Jannik Streidl committed
42
43
44
    # pass build args to the build
    INCLUDE_OLLAMA_DOCKER=${INCLUDE_OLLAMA} \
    USE_MPS_DOCKER=${USE_MPS} \
45
46
47
    USE_CUDA_DOCKER=${USE_CUDA} \
    USE_CUDA_DOCKER_VER=${USE_CUDA_VER} \
    USE_EMBEDDING_MODEL_DOCKER=${USE_EMBEDDING_MODEL}
Timothy J. Baek's avatar
Timothy J. Baek committed
48

Jannik Streidl's avatar
Jannik Streidl committed
49
50
51
## Basis URL Config ##
ENV OLLAMA_BASE_URL="/ollama" \
    OPENAI_API_BASE_URL=""
Timothy J. Baek's avatar
Timothy J. Baek committed
52

Jannik Streidl's avatar
Jannik Streidl committed
53
54
55
56
57
## API Key and Security Config ##
ENV OPENAI_API_KEY="" \
    WEBUI_SECRET_KEY="" \
    SCARF_NO_ANALYTICS=true \
    DO_NOT_TRACK=true
Timothy J. Baek's avatar
Timothy J. Baek committed
58

Jannik Streidl's avatar
Jannik Streidl committed
59
#### Preloaded models #########################################################
Jannik Streidl's avatar
Jannik Streidl committed
60
61
62
## whisper TTS Settings ##
ENV WHISPER_MODEL="base" \
    WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models"
63

Jannik Streidl's avatar
Jannik Streidl committed
64
## RAG Embedding Model Settings ##
65
66
# any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers
# Leaderboard: https://huggingface.co/spaces/mteb/leaderboard 
67
# for better performance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB)
68
# IMPORTANT: If you change the default model (all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them.
69
ENV RAG_EMBEDDING_MODEL="$USE_EMBEDDING_MODEL_DOCKER" \
Jannik Streidl's avatar
Jannik Streidl committed
70
    RAG_EMBEDDING_MODEL_DIR="/app/backend/data/cache/embedding/models" \
Jannik Streidl's avatar
Jannik Streidl committed
71
72
73
74
75
    SENTENCE_TRANSFORMERS_HOME="/app/backend/data/cache/embedding/models" \
    # device type for whisper tts and embbeding models - "cpu" (default) or "mps" (apple silicon) - choosing this right can lead to better performance
    # Important:
    #  If you want to use CUDA you need to install the nvidia-container-toolkit (https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) 
    #  you can set this to "cuda" but its recomended to use --build-arg CUDA_ENABLED=true flag when building the image
Jannik Streidl's avatar
Jannik Streidl committed
76
    # RAG_EMBEDDING_MODEL_DEVICE_TYPE="cpu" \
77
    DEVICE_COMPUTE_TYPE="int8"
Jannik Streidl's avatar
Jannik Streidl committed
78
# device type for whisper tts and embbeding models - "cpu" (default), "cuda" (nvidia gpu and CUDA required) or "mps" (apple silicon) - choosing this right can lead to better performance
Jannik Streidl's avatar
Jannik Streidl committed
79
#### Preloaded models ##########################################################
80

Timothy J. Baek's avatar
Timothy J. Baek committed
81
WORKDIR /app/backend
82
# install python dependencies
Timothy J. Baek's avatar
Timothy J. Baek committed
83
COPY ./backend/requirements.txt ./requirements.txt
Timothy J. Baek's avatar
Timothy J. Baek committed
84

85
RUN if [ "$USE_CUDA" = "true" ]; then \
86
        pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir && \
87
        pip3 install -r requirements.txt --no-cache-dir; \
88
89
        python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \
        python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device='cpu')"; \
90
91
92
93
    elif [ "$USE_MPS" = "true" ]; then \
        pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \
        pip3 install -r requirements.txt --no-cache-dir && \
        python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \
Jannik Streidl's avatar
Jannik Streidl committed
94
        python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device='mps')"; \
Jannik Streidl's avatar
Jannik Streidl committed
95
96
    else \
        pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \
97
98
        pip3 install -r requirements.txt --no-cache-dir && \
        python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \
Jannik Streidl's avatar
Jannik Streidl committed
99
        python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device='cpu')"; \
Jannik Streidl's avatar
Jannik Streidl committed
100
101
    fi

102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123

RUN if [ "$INCLUDE_OLLAMA" = "true" ]; then \
        apt-get update && \
        # Install pandoc and netcat
        apt-get install -y --no-install-recommends pandoc netcat-openbsd && \
        # for RAG OCR
        apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \
        # install helper tools
        apt-get install -y --no-install-recommends curl && \
        # install ollama
        curl -fsSL https://ollama.com/install.sh | sh && \
        # cleanup
        rm -rf /var/lib/apt/lists/*; \
    else \
        apt-get update && \
        # Install pandoc and netcat
        apt-get install -y --no-install-recommends pandoc netcat-openbsd && \
        # for RAG OCR
        apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \
        # cleanup
        rm -rf /var/lib/apt/lists/*; \
    fi
Timothy J. Baek's avatar
Timothy J. Baek committed
124

Jannik Streidl's avatar
Jannik Streidl committed
125

126

127
# copy embedding weight from build
Jannik Streidl's avatar
Jannik Streidl committed
128
129
# RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2
# COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx
130
131
132

# copy built frontend files
COPY --from=build /app/build /app/build
133
134
COPY --from=build /app/CHANGELOG.md /app/CHANGELOG.md
COPY --from=build /app/package.json /app/package.json
135
136

# copy backend files
Timothy J. Baek's avatar
Timothy J. Baek committed
137
138
COPY ./backend .

Jannik S's avatar
Jannik S committed
139
140
EXPOSE 8080

141
CMD [ "bash", "start.sh"]