Unverified Commit 506a0613 authored by lainedfles's avatar lainedfles Committed by GitHub
Browse files

Merge branch 'dev' into embedding-model-fix-and-manual-update

parents ec530ac9 1a2971ae
......@@ -9,4 +9,8 @@ OPENAI_API_KEY=''
# DO NOT TRACK
SCARF_NO_ANALYTICS=true
DO_NOT_TRACK=true
\ No newline at end of file
DO_NOT_TRACK=true
# Use locally bundled version of the LiteLLM cost map json
# to avoid repetitive startup connections
LITELLM_LOCAL_MODEL_COST_MAP="True"
......@@ -57,3 +57,14 @@ jobs:
path: .
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Trigger Docker build workflow
uses: actions/github-script@v7
with:
script: |
github.rest.actions.createWorkflowDispatch({
owner: context.repo.owner,
repo: context.repo.repo,
workflow_id: 'docker-build.yaml',
ref: 'v${{ steps.get_version.outputs.version }}',
})
#
name: Create and publish a Docker image
name: Create and publish Docker images with specific build args
# Configures this workflow to run every time a change is pushed to the branch called `release`.
on:
workflow_dispatch:
push:
branches:
- main
......@@ -23,7 +23,7 @@ jobs:
permissions:
contents: read
packages: write
#
steps:
- name: Checkout repository
uses: actions/checkout@v4
......@@ -41,12 +41,11 @@ jobs:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata for Docker images
id: meta
- name: Extract metadata for Docker images (default latest tag)
id: meta-latest
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
# This configuration dynamically generates tags based on the branch, tag, commit, and custom suffix for lite version.
tags: |
type=ref,event=branch
type=ref,event=tag
......@@ -56,11 +55,29 @@ jobs:
flavor: |
latest=${{ github.ref == 'refs/heads/main' }}
- name: Build and push Docker image
- name: Build and push Docker image (latest)
uses: docker/build-push-action@v5
with:
context: .
push: true
platforms: linux/amd64,linux/arm64
tags: ${{ steps.meta-latest.outputs.tags }}
labels: ${{ steps.meta-latest.outputs.labels }}
- name: Build and push Docker image with CUDA
uses: docker/build-push-action@v5
with:
context: .
push: true
platforms: linux/amd64,linux/arm64
tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:cuda
build-args: USE_CUDA=true
- name: Build and push Docker image with Ollama
uses: docker/build-push-action@v5
with:
context: .
push: true
platforms: linux/amd64,linux/arm64
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:ollama
build-args: USE_OLLAMA=true
# syntax=docker/dockerfile:1
# Initialize device type args
# use build args in the docker build commmand with --build-arg="BUILDARG=true"
ARG USE_CUDA=false
ARG USE_OLLAMA=false
# Tested with cu117 for CUDA 11 and cu121 for CUDA 12 (default)
ARG USE_CUDA_VER=cu121
# any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers
# Leaderboard: https://huggingface.co/spaces/mteb/leaderboard
# for better performance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB)
# IMPORTANT: If you change the default model (all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them.
ARG USE_EMBEDDING_MODEL=all-MiniLM-L6-v2
FROM node:alpine as build
######## WebUI frontend ########
FROM node:21-alpine3.19 as build
WORKDIR /app
# wget embedding model weight from alpine (does not exist from slim-buster)
RUN wget "https://chroma-onnx-models.s3.amazonaws.com/all-MiniLM-L6-v2/onnx.tar.gz" -O - | \
tar -xzf - -C /app
COPY package.json package-lock.json ./
RUN npm ci
COPY . .
RUN npm run build
######## WebUI backend ########
FROM python:3.11-slim-bookworm as base
ENV ENV=prod
ENV PORT ""
ENV OLLAMA_BASE_URL "/ollama"
ENV OPENAI_API_BASE_URL ""
ENV OPENAI_API_KEY ""
ENV WEBUI_SECRET_KEY ""
ENV WEBUI_AUTH_TRUSTED_EMAIL_HEADER ""
ENV SCARF_NO_ANALYTICS true
ENV DO_NOT_TRACK true
######## Preloaded models ########
# whisper TTS Settings
ENV WHISPER_MODEL="base"
ENV WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models"
# RAG Embedding Model Settings
# any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers
# Leaderboard: https://huggingface.co/spaces/mteb/leaderboard
# for better persormance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB)
# IMPORTANT: If you change the default model (all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them.
ENV RAG_EMBEDDING_MODEL="all-MiniLM-L6-v2"
# device type for whisper tts and embbeding models - "cpu" (default), "cuda" (nvidia gpu and CUDA required) or "mps" (apple silicon) - choosing this right can lead to better performance
ENV RAG_EMBEDDING_MODEL_DEVICE_TYPE="cpu"
ENV RAG_EMBEDDING_MODEL_DIR="/app/backend/data/cache/embedding/models"
ENV SENTENCE_TRANSFORMERS_HOME $RAG_EMBEDDING_MODEL_DIR
######## Preloaded models ########
# Use args
ARG USE_CUDA
ARG USE_OLLAMA
ARG USE_CUDA_VER
ARG USE_EMBEDDING_MODEL
## Basis ##
ENV ENV=prod \
PORT=8080 \
# pass build args to the build
USE_OLLAMA_DOCKER=${USE_OLLAMA} \
USE_CUDA_DOCKER=${USE_CUDA} \
USE_CUDA_DOCKER_VER=${USE_CUDA_VER} \
USE_EMBEDDING_MODEL_DOCKER=${USE_EMBEDDING_MODEL}
## Basis URL Config ##
ENV OLLAMA_BASE_URL="/ollama" \
OPENAI_API_BASE_URL=""
## API Key and Security Config ##
ENV OPENAI_API_KEY="" \
WEBUI_SECRET_KEY="" \
SCARF_NO_ANALYTICS=true \
DO_NOT_TRACK=true
# Use locally bundled version of the LiteLLM cost map json
# to avoid repetitive startup connections
ENV LITELLM_LOCAL_MODEL_COST_MAP="True"
#### Other models #########################################################
## whisper TTS model settings ##
ENV WHISPER_MODEL="base" \
WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models"
## RAG Embedding model settings ##
ENV RAG_EMBEDDING_MODEL="$USE_EMBEDDING_MODEL_DOCKER" \
RAG_EMBEDDING_MODEL_DIR="/app/backend/data/cache/embedding/models" \
SENTENCE_TRANSFORMERS_HOME="/app/backend/data/cache/embedding/models"
#### Other models ##########################################################
WORKDIR /app/backend
# install python dependencies
COPY ./backend/requirements.txt ./requirements.txt
RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y
RUN if [ "$USE_CUDA" = "true" ]; then \
# If you use CUDA the whisper and embedding modell will be downloaded on first use
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir && \
pip3 install -r requirements.txt --no-cache-dir && \
python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \
python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device='cpu')"; \
else \
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \
pip3 install -r requirements.txt --no-cache-dir && \
python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \
python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device='cpu')"; \
fi
RUN if [ "$USE_OLLAMA" = "true" ]; then \
apt-get update && \
# Install pandoc and netcat
apt-get install -y --no-install-recommends pandoc netcat-openbsd && \
# for RAG OCR
apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \
# install helper tools
apt-get install -y --no-install-recommends curl && \
# install ollama
curl -fsSL https://ollama.com/install.sh | sh && \
# cleanup
rm -rf /var/lib/apt/lists/*; \
else \
apt-get update && \
# Install pandoc and netcat
apt-get install -y --no-install-recommends pandoc netcat-openbsd && \
# for RAG OCR
apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \
# cleanup
rm -rf /var/lib/apt/lists/*; \
fi
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir
RUN pip3 install -r requirements.txt --no-cache-dir
# Install pandoc and netcat
# RUN python -c "import pypandoc; pypandoc.download_pandoc()"
RUN apt-get update \
&& apt-get install -y pandoc netcat-openbsd \
&& rm -rf /var/lib/apt/lists/*
# preload embedding model
RUN python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device=os.environ['RAG_EMBEDDING_MODEL_DEVICE_TYPE'])"
# preload tts model
RUN python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='auto', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"
# copy embedding weight from build
RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2
COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx
# RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2
# COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx
# copy built frontend files
COPY --from=build /app/build /app/build
......@@ -82,4 +120,6 @@ COPY --from=build /app/package.json /app/package.json
# copy backend files
COPY ./backend .
CMD [ "bash", "start.sh"]
EXPOSE 8080
CMD [ "bash", "start.sh"]
\ No newline at end of file
......@@ -113,6 +113,65 @@ Don't forget to explore our sibling project, [Open WebUI Community](https://open
- After installation, you can access Open WebUI at [http://localhost:3000](http://localhost:3000). Enjoy! 😄
- **If you want to customize your build with additional args**, use this commands:
> [!NOTE]
> If you only want to use Open WebUI with Ollama included or CUDA acelleration it's recomented to use our official images with the tags :cuda or :with-ollama
> If you want a combination of both or more customisation options like a different embedding model and/or CUDA version you need to build the image yourself following the instructions below.
**For the build:**
```bash
docker build -t open-webui
```
Optional build ARGS (use them in the docker build command below if needed):
e.g.
```bash
--build-arg="USE_EMBEDDING_MODEL=intfloat/multilingual-e5-large"
```
For "intfloat/multilingual-e5-large" custom embedding model (default is all-MiniLM-L6-v2), only works with [sentence transforer models](https://huggingface.co/models?library=sentence-transformers). Current [Leaderbord](https://huggingface.co/spaces/mteb/leaderboard) of embedding models.
```bash
--build-arg="USE_OLLAMA=true"
```
For including ollama in the image.
```bash
--build-arg="USE_CUDA=true"
```
To use CUDA exeleration for the embedding and whisper models.
> [!NOTE]
> You need to install the [Nvidia CUDA container toolkit](https://docs.nvidia.com/dgx/nvidia-container-runtime-upgrade/) on your machine to be able to set CUDA as the Docker engine. Only works with Linux - use WSL for Windows!
```bash
--build-arg="USE_CUDA_VER=cu117"
```
For CUDA 11 (default is CUDA 12)
**To run the image:**
- **If you DID NOT use the USE_CUDA=true build ARG**, use this command:
```bash
docker run -d -p 3000:8080 -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main
```
- **If you DID use the USE_CUDA=true build ARG**, use this command:
```bash
docker run --gpus all -d -p 3000:8080 -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main
```
- After installation, you can access Open WebUI at [http://localhost:3000](http://localhost:3000). Enjoy! 😄
#### Open WebUI: Server Connection Error
If you're experiencing connection issues, it’s often due to the WebUI docker container not being able to reach the Ollama server at 127.0.0.1:11434 (host.docker.internal:11434) inside the container . Use the `--network=host` flag in your docker command to resolve this. Note that the port changes from 3000 to 8080, resulting in the link: `http://localhost:8080`.
......
......@@ -28,6 +28,7 @@ from config import (
UPLOAD_DIR,
WHISPER_MODEL,
WHISPER_MODEL_DIR,
DEVICE_TYPE,
)
log = logging.getLogger(__name__)
......@@ -42,6 +43,10 @@ app.add_middleware(
allow_headers=["*"],
)
# setting device type for whisper model
whisper_device_type = DEVICE_TYPE if DEVICE_TYPE and DEVICE_TYPE == "cuda" else "cpu"
log.info(f"whisper_device_type: {whisper_device_type}")
@app.post("/transcribe")
def transcribe(
......@@ -66,7 +71,7 @@ def transcribe(
model = WhisperModel(
WHISPER_MODEL,
device="auto",
device=whisper_device_type,
compute_type="int8",
download_root=WHISPER_MODEL_DIR,
)
......
......@@ -215,7 +215,8 @@ async def get_ollama_versions(url_idx: Optional[int] = None):
if len(responses) > 0:
lowest_version = min(
responses, key=lambda x: tuple(map(int, x["version"].split(".")))
responses,
key=lambda x: tuple(map(int, x["version"].split("-")[0].split("."))),
)
return {"version": lowest_version["version"]}
......
......@@ -58,8 +58,8 @@ from config import (
UPLOAD_DIR,
DOCS_DIR,
RAG_EMBEDDING_MODEL,
RAG_EMBEDDING_MODEL_DEVICE_TYPE,
RAG_EMBEDDING_MODEL_AUTO_UPDATE,
DEVICE_TYPE,
CHROMA_CLIENT,
CHUNK_SIZE,
CHUNK_OVERLAP,
......@@ -86,7 +86,7 @@ app.state.TOP_K = 4
app.state.sentence_transformer_ef = (
embedding_functions.SentenceTransformerEmbeddingFunction(
model_name=app.state.RAG_EMBEDDING_MODEL_PATH,
device=RAG_EMBEDDING_MODEL_DEVICE_TYPE,
device=DEVICE_TYPE,
)
)
......@@ -154,7 +154,7 @@ async def update_embedding_model(
app.state.sentence_transformer_ef = (
embedding_functions.SentenceTransformerEmbeddingFunction(
model_name=app.state.RAG_EMBEDDING_MODEL_PATH,
device=RAG_EMBEDDING_MODEL_DEVICE_TYPE,
device=DEVICE_TYPE,
)
)
except Exception as e:
......@@ -471,25 +471,11 @@ def store_doc(
log.info(f"file.content_type: {file.content_type}")
try:
is_valid_filename = True
unsanitized_filename = file.filename
if re.search(r'[\\/:"\*\?<>|\n\t ]', unsanitized_filename) is not None:
is_valid_filename = False
filename = os.path.basename(unsanitized_filename)
unvalidated_file_path = f"{UPLOAD_DIR}/{unsanitized_filename}"
dereferenced_file_path = str(Path(unvalidated_file_path).resolve(strict=False))
if not dereferenced_file_path.startswith(UPLOAD_DIR):
is_valid_filename = False
file_path = f"{UPLOAD_DIR}/{filename}"
if is_valid_filename:
file_path = dereferenced_file_path
else:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.DEFAULT(),
)
filename = file.filename
contents = file.file.read()
with open(file_path, "wb") as f:
f.write(contents)
......@@ -500,7 +486,7 @@ def store_doc(
collection_name = calculate_sha256(f)[:63]
f.close()
loader, known_type = get_loader(file.filename, file.content_type, file_path)
loader, known_type = get_loader(filename, file.content_type, file_path)
data = loader.load()
try:
......
......@@ -86,6 +86,7 @@ class SignupForm(BaseModel):
name: str
email: str
password: str
profile_image_url: Optional[str] = "/user.png"
class AuthsTable:
......@@ -94,7 +95,12 @@ class AuthsTable:
self.db.create_tables([Auth])
def insert_new_auth(
self, email: str, password: str, name: str, role: str = "pending"
self,
email: str,
password: str,
name: str,
profile_image_url: str = "/user.png",
role: str = "pending",
) -> Optional[UserModel]:
log.info("insert_new_auth")
......@@ -105,7 +111,7 @@ class AuthsTable:
)
result = Auth.create(**auth.model_dump())
user = Users.insert_new_user(id, name, email, role)
user = Users.insert_new_user(id, name, email, profile_image_url, role)
if result and user:
return user
......
......@@ -206,6 +206,18 @@ class ChatTable:
except:
return None
def get_chat_by_share_id(self, id: str) -> Optional[ChatModel]:
try:
chat = Chat.get(Chat.share_id == id)
if chat:
chat = Chat.get(Chat.id == id)
return ChatModel(**model_to_dict(chat))
else:
return None
except:
return None
def get_chat_by_id_and_user_id(self, id: str, user_id: str) -> Optional[ChatModel]:
try:
chat = Chat.get(Chat.id == id, Chat.user_id == user_id)
......
......@@ -31,7 +31,7 @@ class UserModel(BaseModel):
name: str
email: str
role: str = "pending"
profile_image_url: str = "/user.png"
profile_image_url: str
timestamp: int # timestamp in epoch
api_key: Optional[str] = None
......@@ -59,7 +59,12 @@ class UsersTable:
self.db.create_tables([User])
def insert_new_user(
self, id: str, name: str, email: str, role: str = "pending"
self,
id: str,
name: str,
email: str,
profile_image_url: str = "/user.png",
role: str = "pending",
) -> Optional[UserModel]:
user = UserModel(
**{
......@@ -67,7 +72,7 @@ class UsersTable:
"name": name,
"email": email,
"role": role,
"profile_image_url": "/user.png",
"profile_image_url": profile_image_url,
"timestamp": int(time.time()),
}
)
......
......@@ -163,7 +163,11 @@ async def signup(request: Request, form_data: SignupForm):
)
hashed = get_password_hash(form_data.password)
user = Auths.insert_new_auth(
form_data.email.lower(), hashed, form_data.name, role
form_data.email.lower(),
hashed,
form_data.name,
form_data.profile_image_url,
role,
)
if user:
......
......@@ -251,7 +251,15 @@ async def delete_shared_chat_by_id(id: str, user=Depends(get_current_user)):
@router.get("/share/{share_id}", response_model=Optional[ChatResponse])
async def get_shared_chat_by_id(share_id: str, user=Depends(get_current_user)):
chat = Chats.get_chat_by_id(share_id)
if user.role == "pending":
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED, detail=ERROR_MESSAGES.NOT_FOUND
)
if user.role == "user":
chat = Chats.get_chat_by_share_id(share_id)
elif user.role == "admin":
chat = Chats.get_chat_by_id(share_id)
if chat:
return ChatResponse(**{**chat.model_dump(), "chat": json.loads(chat.chat)})
......
from fastapi import APIRouter, UploadFile, File, BackgroundTasks
from fastapi import APIRouter, UploadFile, File, Response
from fastapi import Depends, HTTPException, status
from starlette.responses import StreamingResponse, FileResponse
from pydantic import BaseModel
from fpdf import FPDF
import markdown
import requests
import os
import aiohttp
import json
from utils.utils import get_admin_user
......@@ -18,7 +13,7 @@ from utils.misc import calculate_sha256, get_gravatar_url
from config import OLLAMA_BASE_URLS, DATA_DIR, UPLOAD_DIR
from constants import ERROR_MESSAGES
from typing import List
router = APIRouter()
......@@ -41,6 +36,59 @@ async def get_html_from_markdown(
return {"html": markdown.markdown(form_data.md)}
class ChatForm(BaseModel):
title: str
messages: List[dict]
@router.post("/pdf")
async def download_chat_as_pdf(
form_data: ChatForm,
):
pdf = FPDF()
pdf.add_page()
STATIC_DIR = "./static"
FONTS_DIR = f"{STATIC_DIR}/fonts"
pdf.add_font("NotoSans", "", f"{FONTS_DIR}/NotoSans-Regular.ttf")
pdf.add_font("NotoSans", "b", f"{FONTS_DIR}/NotoSans-Bold.ttf")
pdf.add_font("NotoSans", "i", f"{FONTS_DIR}/NotoSans-Italic.ttf")
pdf.add_font("NotoSansKR", "", f"{FONTS_DIR}/NotoSansKR-Regular.ttf")
pdf.add_font("NotoSansJP", "", f"{FONTS_DIR}/NotoSansJP-Regular.ttf")
pdf.set_font("NotoSans", size=12)
pdf.set_fallback_fonts(["NotoSansKR", "NotoSansJP"])
pdf.set_auto_page_break(auto=True, margin=15)
# Adjust the effective page width for multi_cell
effective_page_width = (
pdf.w - 2 * pdf.l_margin - 10
) # Subtracted an additional 10 for extra padding
# Add chat messages
for message in form_data.messages:
role = message["role"]
content = message["content"]
pdf.set_font("NotoSans", "B", size=14) # Bold for the role
pdf.multi_cell(effective_page_width, 10, f"{role.upper()}", 0, "L")
pdf.ln(1) # Extra space between messages
pdf.set_font("NotoSans", size=10) # Regular for content
pdf.multi_cell(effective_page_width, 6, content, 0, "L")
pdf.ln(1.5) # Extra space between messages
# Save the pdf with name .pdf
pdf_bytes = pdf.output()
return Response(
content=bytes(pdf_bytes),
media_type="application/pdf",
headers={"Content-Disposition": f"attachment;filename=chat.pdf"},
)
@router.get("/db/download")
async def download_db(user=Depends(get_admin_user)):
......
......@@ -257,6 +257,7 @@ OLLAMA_API_BASE_URL = os.environ.get(
OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "")
K8S_FLAG = os.environ.get("K8S_FLAG", "")
USE_OLLAMA_DOCKER = os.environ.get("USE_OLLAMA_DOCKER", "false")
if OLLAMA_BASE_URL == "" and OLLAMA_API_BASE_URL != "":
OLLAMA_BASE_URL = (
......@@ -266,9 +267,13 @@ if OLLAMA_BASE_URL == "" and OLLAMA_API_BASE_URL != "":
)
if ENV == "prod":
if OLLAMA_BASE_URL == "/ollama":
OLLAMA_BASE_URL = "http://host.docker.internal:11434"
if OLLAMA_BASE_URL == "/ollama" and not K8S_FLAG:
if USE_OLLAMA_DOCKER.lower() == "true":
# if you use all-in-one docker container (Open WebUI + Ollama)
# with the docker build arg USE_OLLAMA=true (--build-arg="USE_OLLAMA=true") this only works with http://localhost:11434
OLLAMA_BASE_URL = "http://localhost:11434"
else:
OLLAMA_BASE_URL = "http://host.docker.internal:11434"
elif K8S_FLAG:
OLLAMA_BASE_URL = "http://ollama-service.open-webui.svc.cluster.local:11434"
......@@ -391,13 +396,21 @@ if WEBUI_AUTH and WEBUI_SECRET_KEY == "":
CHROMA_DATA_PATH = f"{DATA_DIR}/vector_db"
# this uses the model defined in the Dockerfile ENV variable. If you dont use docker or docker based deployments such as k8s, the default embedding model will be used (all-MiniLM-L6-v2)
RAG_EMBEDDING_MODEL = os.environ.get("RAG_EMBEDDING_MODEL", "all-MiniLM-L6-v2")
# device type ebbeding models - "cpu" (default), "cuda" (nvidia gpu required) or "mps" (apple silicon) - choosing this right can lead to better performance
RAG_EMBEDDING_MODEL_DEVICE_TYPE = os.environ.get(
"RAG_EMBEDDING_MODEL_DEVICE_TYPE", "cpu"
)
log.info(f"Embedding model set: {RAG_EMBEDDING_MODEL}"),
RAG_EMBEDDING_MODEL_AUTO_UPDATE = False
if os.environ.get("RAG_EMBEDDING_MODEL_AUTO_UPDATE", "").lower() == "true":
RAG_EMBEDDING_MODEL_AUTO_UPDATE = True
# device type ebbeding models - "cpu" (default), "cuda" (nvidia gpu required) or "mps" (apple silicon) - choosing this right can lead to better performance
USE_CUDA = os.environ.get("USE_CUDA_DOCKER", "false")
if USE_CUDA.lower() == "true":
DEVICE_TYPE = "cuda"
else:
DEVICE_TYPE = "cpu"
CHROMA_CLIENT = chromadb.PersistentClient(
path=CHROMA_DATA_PATH,
settings=Settings(allow_reset=True, anonymized_telemetry=False),
......
......@@ -42,6 +42,8 @@ xlrd
opencv-python-headless
rapidocr-onnxruntime
fpdf2
faster-whisper
PyJWT
......
......@@ -7,16 +7,26 @@ KEY_FILE=.webui_secret_key
PORT="${PORT:-8080}"
if test "$WEBUI_SECRET_KEY $WEBUI_JWT_SECRET_KEY" = " "; then
echo No WEBUI_SECRET_KEY provided
echo "No WEBUI_SECRET_KEY provided"
if ! [ -e "$KEY_FILE" ]; then
echo Generating WEBUI_SECRET_KEY
echo "Generating WEBUI_SECRET_KEY"
# Generate a random value to use as a WEBUI_SECRET_KEY in case the user didn't provide one.
echo $(head -c 12 /dev/random | base64) > $KEY_FILE
echo $(head -c 12 /dev/random | base64) > "$KEY_FILE"
fi
echo Loading WEBUI_SECRET_KEY from $KEY_FILE
WEBUI_SECRET_KEY=`cat $KEY_FILE`
echo "Loading WEBUI_SECRET_KEY from $KEY_FILE"
WEBUI_SECRET_KEY=$(cat "$KEY_FILE")
fi
WEBUI_SECRET_KEY="$WEBUI_SECRET_KEY" exec uvicorn main:app --host 0.0.0.0 --port "$PORT" --forwarded-allow-ips '*'
\ No newline at end of file
if [ "$USE_OLLAMA_DOCKER" = "true" ]; then
echo "USE_OLLAMA is set to true, starting ollama serve."
ollama serve &
fi
if [ "$USE_CUDA_DOCKER" = "true" ]; then
echo "CUDA is enabled, appending LD_LIBRARY_PATH to include torch/cudnn & cublas libraries."
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/python3.11/site-packages/torch/lib:/usr/local/lib/python3.11/site-packages/nvidia/cudnn/lib"
fi
WEBUI_SECRET_KEY="$WEBUI_SECRET_KEY" exec uvicorn main:app --host 0.0.0.0 --port "$PORT" --forwarded-allow-ips '*'
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment