Dockerfile 5.72 KB
Newer Older
Timothy J. Baek's avatar
Timothy J. Baek committed
1
# syntax=docker/dockerfile:1
2
# Initialize device type args
Jannik Streidl's avatar
grammar  
Jannik Streidl committed
3
# use build args in the docker build commmand with --build-arg="BUILDARG=true"
4
5
ARG USE_CUDA=false
ARG USE_MPS=false
6
ARG INCLUDE_OLLAMA=false
Timothy J. Baek's avatar
Timothy J. Baek committed
7

Jannik Streidl's avatar
Jannik Streidl committed
8
######## WebUI frontend ########
Jannik S's avatar
Jannik S committed
9
FROM node:21-alpine3.19 as build
10

Timothy J. Baek's avatar
Timothy J. Baek committed
11
12
WORKDIR /app

13
14
15
16
#RUN apt-get update \ 
#    && apt-get install -y --no-install-recommends wget \ 
#    # cleanup
#    && rm -rf /var/lib/apt/lists/*
Jannik Streidl's avatar
Jannik Streidl committed
17

Timothy J. Baek's avatar
Timothy J. Baek committed
18
# wget embedding model weight from alpine (does not exist from slim-buster)
Jannik Streidl's avatar
Jannik Streidl committed
19
20
#RUN wget "https://chroma-onnx-models.s3.amazonaws.com/all-MiniLM-L6-v2/onnx.tar.gz" -O - | \
#    tar -xzf - -C /app
Timothy J. Baek's avatar
Timothy J. Baek committed
21

Xiaodong Ye's avatar
Xiaodong Ye committed
22
COPY package.json package-lock.json ./
23
RUN npm ci
24

25
26
COPY . .
RUN npm run build
Timothy J. Baek's avatar
Timothy J. Baek committed
27

Jannik Streidl's avatar
Jannik Streidl committed
28
######## WebUI backend ########
Timothy J. Baek's avatar
Timothy J. Baek committed
29
FROM python:3.11-slim-bookworm as base
Timothy J. Baek's avatar
Timothy J. Baek committed
30

31
32
33
# Use args
ARG USE_CUDA
ARG USE_MPS
34
ARG INCLUDE_OLLAMA
35

Jannik Streidl's avatar
Jannik Streidl committed
36
37
## Basis ##
ENV ENV=prod \
38
    PORT=8080 \
Jannik Streidl's avatar
Jannik Streidl committed
39
40
41
42
    # pass build args to the build
    INCLUDE_OLLAMA_DOCKER=${INCLUDE_OLLAMA} \
    USE_MPS_DOCKER=${USE_MPS} \
    USE_CUDA_DOCKER=${USE_CUDA}
Timothy J. Baek's avatar
Timothy J. Baek committed
43

Jannik Streidl's avatar
Jannik Streidl committed
44
45
46
## Basis URL Config ##
ENV OLLAMA_BASE_URL="/ollama" \
    OPENAI_API_BASE_URL=""
Timothy J. Baek's avatar
Timothy J. Baek committed
47

Jannik Streidl's avatar
Jannik Streidl committed
48
49
50
51
52
## API Key and Security Config ##
ENV OPENAI_API_KEY="" \
    WEBUI_SECRET_KEY="" \
    SCARF_NO_ANALYTICS=true \
    DO_NOT_TRACK=true
Timothy J. Baek's avatar
Timothy J. Baek committed
53

Jannik Streidl's avatar
Jannik Streidl committed
54
#### Preloaded models #########################################################
Jannik Streidl's avatar
Jannik Streidl committed
55
56
57
## whisper TTS Settings ##
ENV WHISPER_MODEL="base" \
    WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models"
58

Jannik Streidl's avatar
Jannik Streidl committed
59
## RAG Embedding Model Settings ##
60
61
# any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers
# Leaderboard: https://huggingface.co/spaces/mteb/leaderboard 
62
# for better performance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB)
63
# IMPORTANT: If you change the default model (all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them.
Jannik Streidl's avatar
Jannik Streidl committed
64
65
ENV RAG_EMBEDDING_MODEL="all-MiniLM-L6-v2" \
    RAG_EMBEDDING_MODEL_DIR="/app/backend/data/cache/embedding/models" \
Jannik Streidl's avatar
Jannik Streidl committed
66
67
68
69
70
    SENTENCE_TRANSFORMERS_HOME="/app/backend/data/cache/embedding/models" \
    # device type for whisper tts and embbeding models - "cpu" (default) or "mps" (apple silicon) - choosing this right can lead to better performance
    # Important:
    #  If you want to use CUDA you need to install the nvidia-container-toolkit (https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) 
    #  you can set this to "cuda" but its recomended to use --build-arg CUDA_ENABLED=true flag when building the image
Jannik Streidl's avatar
Jannik Streidl committed
71
    # RAG_EMBEDDING_MODEL_DEVICE_TYPE="cpu" \
72
    DEVICE_COMPUTE_TYPE="int8"
Jannik Streidl's avatar
Jannik Streidl committed
73
# device type for whisper tts and embbeding models - "cpu" (default), "cuda" (nvidia gpu and CUDA required) or "mps" (apple silicon) - choosing this right can lead to better performance
Jannik Streidl's avatar
Jannik Streidl committed
74
#### Preloaded models ##########################################################
75

Timothy J. Baek's avatar
Timothy J. Baek committed
76
WORKDIR /app/backend
77
# install python dependencies
Timothy J. Baek's avatar
Timothy J. Baek committed
78
COPY ./backend/requirements.txt ./requirements.txt
Timothy J. Baek's avatar
Timothy J. Baek committed
79

80
81
82
83
84
85
86
RUN if [ "$USE_CUDA" = "true" ]; then \
        pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117 --no-cache-dir && \
        pip3 install -r requirements.txt --no-cache-dir; \
    elif [ "$USE_MPS" = "true" ]; then \
        pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \
        pip3 install -r requirements.txt --no-cache-dir && \
        python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \
Jannik Streidl's avatar
Jannik Streidl committed
87
        python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device='mps')"; \
Jannik Streidl's avatar
Jannik Streidl committed
88
89
    else \
        pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \
90
91
        pip3 install -r requirements.txt --no-cache-dir && \
        python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \
Jannik Streidl's avatar
Jannik Streidl committed
92
        python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device='cpu')"; \
Jannik Streidl's avatar
Jannik Streidl committed
93
94
    fi

95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116

RUN if [ "$INCLUDE_OLLAMA" = "true" ]; then \
        apt-get update && \
        # Install pandoc and netcat
        apt-get install -y --no-install-recommends pandoc netcat-openbsd && \
        # for RAG OCR
        apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \
        # install helper tools
        apt-get install -y --no-install-recommends curl && \
        # install ollama
        curl -fsSL https://ollama.com/install.sh | sh && \
        # cleanup
        rm -rf /var/lib/apt/lists/*; \
    else \
        apt-get update && \
        # Install pandoc and netcat
        apt-get install -y --no-install-recommends pandoc netcat-openbsd && \
        # for RAG OCR
        apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \
        # cleanup
        rm -rf /var/lib/apt/lists/*; \
    fi
Timothy J. Baek's avatar
Timothy J. Baek committed
117

Jannik Streidl's avatar
Jannik Streidl committed
118

119

120
# copy embedding weight from build
Jannik Streidl's avatar
Jannik Streidl committed
121
122
# RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2
# COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx
123
124
125

# copy built frontend files
COPY --from=build /app/build /app/build
126
127
COPY --from=build /app/CHANGELOG.md /app/CHANGELOG.md
COPY --from=build /app/package.json /app/package.json
128
129

# copy backend files
Timothy J. Baek's avatar
Timothy J. Baek committed
130
131
COPY ./backend .

Jannik S's avatar
Jannik S committed
132
133
EXPOSE 8080

134
CMD [ "bash", "start.sh"]