Dockerfile 5.77 KB
Newer Older
Timothy J. Baek's avatar
Timothy J. Baek committed
1
# syntax=docker/dockerfile:1
2
# Initialize device type args
Jannik Streidl's avatar
grammar  
Jannik Streidl committed
3
# use build args in the docker build commmand with --build-arg="BUILDARG=true"
4
5
ARG USE_CUDA=false
ARG USE_MPS=false
6
ARG INCLUDE_OLLAMA=false
Timothy J. Baek's avatar
Timothy J. Baek committed
7

Jannik Streidl's avatar
Jannik Streidl committed
8
######## WebUI frontend ########
Jannik S's avatar
Jannik S committed
9
FROM node:21-alpine3.19 as build
10

Timothy J. Baek's avatar
Timothy J. Baek committed
11
12
WORKDIR /app

13
14
15
16
#RUN apt-get update \ 
#    && apt-get install -y --no-install-recommends wget \ 
#    # cleanup
#    && rm -rf /var/lib/apt/lists/*
Jannik Streidl's avatar
Jannik Streidl committed
17

Timothy J. Baek's avatar
Timothy J. Baek committed
18
# wget embedding model weight from alpine (does not exist from slim-buster)
Jannik Streidl's avatar
Jannik Streidl committed
19
20
#RUN wget "https://chroma-onnx-models.s3.amazonaws.com/all-MiniLM-L6-v2/onnx.tar.gz" -O - | \
#    tar -xzf - -C /app
Timothy J. Baek's avatar
Timothy J. Baek committed
21

Xiaodong Ye's avatar
Xiaodong Ye committed
22
COPY package.json package-lock.json ./
23
RUN npm ci
24

25
26
COPY . .
RUN npm run build
Timothy J. Baek's avatar
Timothy J. Baek committed
27

Jannik Streidl's avatar
Jannik Streidl committed
28
######## WebUI backend ########
Timothy J. Baek's avatar
Timothy J. Baek committed
29
FROM python:3.11-slim-bookworm as base
Timothy J. Baek's avatar
Timothy J. Baek committed
30

31
32
33
# Use args
ARG USE_CUDA
ARG USE_MPS
34
ARG INCLUDE_OLLAMA
35

Jannik Streidl's avatar
Jannik Streidl committed
36
37
## Basis ##
ENV ENV=prod \
38
39
    PORT=8080 \
    INCLUDE_OLLAMA_ENV=${INCLUDE_OLLAMA}
Timothy J. Baek's avatar
Timothy J. Baek committed
40

Jannik Streidl's avatar
Jannik Streidl committed
41
42
43
## Basis URL Config ##
ENV OLLAMA_BASE_URL="/ollama" \
    OPENAI_API_BASE_URL=""
Timothy J. Baek's avatar
Timothy J. Baek committed
44

Jannik Streidl's avatar
Jannik Streidl committed
45
46
47
48
49
## API Key and Security Config ##
ENV OPENAI_API_KEY="" \
    WEBUI_SECRET_KEY="" \
    SCARF_NO_ANALYTICS=true \
    DO_NOT_TRACK=true
Timothy J. Baek's avatar
Timothy J. Baek committed
50

Jannik Streidl's avatar
Jannik Streidl committed
51
#### Preloaded models #########################################################
Jannik Streidl's avatar
Jannik Streidl committed
52
53
54
## whisper TTS Settings ##
ENV WHISPER_MODEL="base" \
    WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models"
55

Jannik Streidl's avatar
Jannik Streidl committed
56
## RAG Embedding Model Settings ##
57
58
# any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers
# Leaderboard: https://huggingface.co/spaces/mteb/leaderboard 
59
# for better performance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB)
60
# IMPORTANT: If you change the default model (all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them.
Jannik Streidl's avatar
Jannik Streidl committed
61
62
ENV RAG_EMBEDDING_MODEL="all-MiniLM-L6-v2" \
    RAG_EMBEDDING_MODEL_DIR="/app/backend/data/cache/embedding/models" \
Jannik Streidl's avatar
Jannik Streidl committed
63
64
65
66
67
    SENTENCE_TRANSFORMERS_HOME="/app/backend/data/cache/embedding/models" \
    # device type for whisper tts and embbeding models - "cpu" (default) or "mps" (apple silicon) - choosing this right can lead to better performance
    # Important:
    #  If you want to use CUDA you need to install the nvidia-container-toolkit (https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) 
    #  you can set this to "cuda" but its recomended to use --build-arg CUDA_ENABLED=true flag when building the image
68
69
    RAG_EMBEDDING_MODEL_DEVICE_TYPE="cpu" \
    DEVICE_COMPUTE_TYPE="int8"
Jannik Streidl's avatar
Jannik Streidl committed
70
# device type for whisper tts and embbeding models - "cpu" (default), "cuda" (nvidia gpu and CUDA required) or "mps" (apple silicon) - choosing this right can lead to better performance
Jannik Streidl's avatar
Jannik Streidl committed
71
#### Preloaded models ##########################################################
72

Timothy J. Baek's avatar
Timothy J. Baek committed
73
WORKDIR /app/backend
74
# install python dependencies
Timothy J. Baek's avatar
Timothy J. Baek committed
75
COPY ./backend/requirements.txt ./requirements.txt
Timothy J. Baek's avatar
Timothy J. Baek committed
76

77
78
79
80
81
82
83
84
85
86
RUN if [ "$USE_CUDA" = "true" ]; then \
        export DEVICE_TYPE="cuda" && \
        pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117 --no-cache-dir && \
        pip3 install -r requirements.txt --no-cache-dir; \
    elif [ "$USE_MPS" = "true" ]; then \
        export DEVICE_TYPE="mps" && \
        pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \
        pip3 install -r requirements.txt --no-cache-dir && \
        python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \
        python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device=os.environ['DEVICE_TYPE'])"; \
Jannik Streidl's avatar
Jannik Streidl committed
87
    else \
88
        export DEVICE_TYPE="cpu" && \
Jannik Streidl's avatar
Jannik Streidl committed
89
        pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \
90
91
92
        pip3 install -r requirements.txt --no-cache-dir && \
        python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \
        python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device=os.environ['DEVICE_TYPE'])"; \
Jannik Streidl's avatar
Jannik Streidl committed
93
94
    fi

95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116

RUN if [ "$INCLUDE_OLLAMA" = "true" ]; then \
        apt-get update && \
        # Install pandoc and netcat
        apt-get install -y --no-install-recommends pandoc netcat-openbsd && \
        # for RAG OCR
        apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \
        # install helper tools
        apt-get install -y --no-install-recommends curl && \
        # install ollama
        curl -fsSL https://ollama.com/install.sh | sh && \
        # cleanup
        rm -rf /var/lib/apt/lists/*; \
    else \
        apt-get update && \
        # Install pandoc and netcat
        apt-get install -y --no-install-recommends pandoc netcat-openbsd && \
        # for RAG OCR
        apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \
        # cleanup
        rm -rf /var/lib/apt/lists/*; \
    fi
Timothy J. Baek's avatar
Timothy J. Baek committed
117

Jannik Streidl's avatar
Jannik Streidl committed
118

119

120
# copy embedding weight from build
Jannik Streidl's avatar
Jannik Streidl committed
121
122
# RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2
# COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx
123
124
125

# copy built frontend files
COPY --from=build /app/build /app/build
126
127
COPY --from=build /app/CHANGELOG.md /app/CHANGELOG.md
COPY --from=build /app/package.json /app/package.json
128
129

# copy backend files
Timothy J. Baek's avatar
Timothy J. Baek committed
130
131
COPY ./backend .

Jannik S's avatar
Jannik S committed
132
133
EXPOSE 8080

134
CMD [ "bash", "start.sh"]