Dockerfile 5.69 KB
Newer Older
Timothy J. Baek's avatar
Timothy J. Baek committed
1
# syntax=docker/dockerfile:1
2
3
4
# Initialize device type args
ARG USE_CUDA=false
ARG USE_MPS=false
5
ARG INCLUDE_OLLAMA=false
Timothy J. Baek's avatar
Timothy J. Baek committed
6

Jannik Streidl's avatar
Jannik Streidl committed
7
######## WebUI frontend ########
Jannik S's avatar
Jannik S committed
8
FROM node:21-alpine3.19 as build
9

Timothy J. Baek's avatar
Timothy J. Baek committed
10
11
WORKDIR /app

12
13
14
15
#RUN apt-get update \ 
#    && apt-get install -y --no-install-recommends wget \ 
#    # cleanup
#    && rm -rf /var/lib/apt/lists/*
Jannik Streidl's avatar
Jannik Streidl committed
16

Timothy J. Baek's avatar
Timothy J. Baek committed
17
# wget embedding model weight from alpine (does not exist from slim-buster)
Jannik Streidl's avatar
Jannik Streidl committed
18
19
#RUN wget "https://chroma-onnx-models.s3.amazonaws.com/all-MiniLM-L6-v2/onnx.tar.gz" -O - | \
#    tar -xzf - -C /app
Timothy J. Baek's avatar
Timothy J. Baek committed
20

Xiaodong Ye's avatar
Xiaodong Ye committed
21
COPY package.json package-lock.json ./
22
RUN npm ci
23

24
25
COPY . .
RUN npm run build
Timothy J. Baek's avatar
Timothy J. Baek committed
26

Jannik Streidl's avatar
Jannik Streidl committed
27
######## WebUI backend ########
Timothy J. Baek's avatar
Timothy J. Baek committed
28
FROM python:3.11-slim-bookworm as base
Timothy J. Baek's avatar
Timothy J. Baek committed
29

30
31
32
# Use args
ARG USE_CUDA
ARG USE_MPS
33
ARG INCLUDE_OLLAMA
34

Jannik Streidl's avatar
Jannik Streidl committed
35
36
## Basis ##
ENV ENV=prod \
37
38
    PORT=8080 \
    INCLUDE_OLLAMA_ENV=${INCLUDE_OLLAMA}
Timothy J. Baek's avatar
Timothy J. Baek committed
39

Jannik Streidl's avatar
Jannik Streidl committed
40
41
42
## Basis URL Config ##
ENV OLLAMA_BASE_URL="/ollama" \
    OPENAI_API_BASE_URL=""
Timothy J. Baek's avatar
Timothy J. Baek committed
43

Jannik Streidl's avatar
Jannik Streidl committed
44
45
46
47
48
## API Key and Security Config ##
ENV OPENAI_API_KEY="" \
    WEBUI_SECRET_KEY="" \
    SCARF_NO_ANALYTICS=true \
    DO_NOT_TRACK=true
Timothy J. Baek's avatar
Timothy J. Baek committed
49

Jannik Streidl's avatar
Jannik Streidl committed
50
#### Preloaded models #########################################################
Jannik Streidl's avatar
Jannik Streidl committed
51
52
53
## whisper TTS Settings ##
ENV WHISPER_MODEL="base" \
    WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models"
54

Jannik Streidl's avatar
Jannik Streidl committed
55
## RAG Embedding Model Settings ##
56
57
# any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers
# Leaderboard: https://huggingface.co/spaces/mteb/leaderboard 
58
# for better performance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB)
59
# IMPORTANT: If you change the default model (all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them.
Jannik Streidl's avatar
Jannik Streidl committed
60
61
ENV RAG_EMBEDDING_MODEL="all-MiniLM-L6-v2" \
    RAG_EMBEDDING_MODEL_DIR="/app/backend/data/cache/embedding/models" \
Jannik Streidl's avatar
Jannik Streidl committed
62
63
64
65
66
    SENTENCE_TRANSFORMERS_HOME="/app/backend/data/cache/embedding/models" \
    # device type for whisper tts and embbeding models - "cpu" (default) or "mps" (apple silicon) - choosing this right can lead to better performance
    # Important:
    #  If you want to use CUDA you need to install the nvidia-container-toolkit (https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) 
    #  you can set this to "cuda" but its recomended to use --build-arg CUDA_ENABLED=true flag when building the image
67
68
    RAG_EMBEDDING_MODEL_DEVICE_TYPE="cpu" \
    DEVICE_COMPUTE_TYPE="int8"
Jannik Streidl's avatar
Jannik Streidl committed
69
# device type for whisper tts and embbeding models - "cpu" (default), "cuda" (nvidia gpu and CUDA required) or "mps" (apple silicon) - choosing this right can lead to better performance
Jannik Streidl's avatar
Jannik Streidl committed
70
#### Preloaded models ##########################################################
71

Timothy J. Baek's avatar
Timothy J. Baek committed
72
WORKDIR /app/backend
73
# install python dependencies
Timothy J. Baek's avatar
Timothy J. Baek committed
74
COPY ./backend/requirements.txt ./requirements.txt
Timothy J. Baek's avatar
Timothy J. Baek committed
75

76
77
78
79
80
81
82
83
84
85
RUN if [ "$USE_CUDA" = "true" ]; then \
        export DEVICE_TYPE="cuda" && \
        pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117 --no-cache-dir && \
        pip3 install -r requirements.txt --no-cache-dir; \
    elif [ "$USE_MPS" = "true" ]; then \
        export DEVICE_TYPE="mps" && \
        pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \
        pip3 install -r requirements.txt --no-cache-dir && \
        python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \
        python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device=os.environ['DEVICE_TYPE'])"; \
Jannik Streidl's avatar
Jannik Streidl committed
86
    else \
87
        export DEVICE_TYPE="cpu" && \
Jannik Streidl's avatar
Jannik Streidl committed
88
        pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \
89
90
91
        pip3 install -r requirements.txt --no-cache-dir && \
        python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \
        python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device=os.environ['DEVICE_TYPE'])"; \
Jannik Streidl's avatar
Jannik Streidl committed
92
93
    fi

94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115

RUN if [ "$INCLUDE_OLLAMA" = "true" ]; then \
        apt-get update && \
        # Install pandoc and netcat
        apt-get install -y --no-install-recommends pandoc netcat-openbsd && \
        # for RAG OCR
        apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \
        # install helper tools
        apt-get install -y --no-install-recommends curl && \
        # install ollama
        curl -fsSL https://ollama.com/install.sh | sh && \
        # cleanup
        rm -rf /var/lib/apt/lists/*; \
    else \
        apt-get update && \
        # Install pandoc and netcat
        apt-get install -y --no-install-recommends pandoc netcat-openbsd && \
        # for RAG OCR
        apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \
        # cleanup
        rm -rf /var/lib/apt/lists/*; \
    fi
Timothy J. Baek's avatar
Timothy J. Baek committed
116

Jannik Streidl's avatar
Jannik Streidl committed
117

118

119
# copy embedding weight from build
Jannik Streidl's avatar
Jannik Streidl committed
120
121
# RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2
# COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx
122
123
124

# copy built frontend files
COPY --from=build /app/build /app/build
125
126
COPY --from=build /app/CHANGELOG.md /app/CHANGELOG.md
COPY --from=build /app/package.json /app/package.json
127
128

# copy backend files
Timothy J. Baek's avatar
Timothy J. Baek committed
129
130
COPY ./backend .

Jannik S's avatar
Jannik S committed
131
132
EXPOSE 8080

133
CMD [ "bash", "start.sh"]