Unverified Commit 506a0613 authored by lainedfles's avatar lainedfles Committed by GitHub
Browse files

Merge branch 'dev' into embedding-model-fix-and-manual-update

parents ec530ac9 1a2971ae
...@@ -9,4 +9,8 @@ OPENAI_API_KEY='' ...@@ -9,4 +9,8 @@ OPENAI_API_KEY=''
# DO NOT TRACK # DO NOT TRACK
SCARF_NO_ANALYTICS=true SCARF_NO_ANALYTICS=true
DO_NOT_TRACK=true DO_NOT_TRACK=true
\ No newline at end of file
# Use locally bundled version of the LiteLLM cost map json
# to avoid repetitive startup connections
LITELLM_LOCAL_MODEL_COST_MAP="True"
...@@ -57,3 +57,14 @@ jobs: ...@@ -57,3 +57,14 @@ jobs:
path: . path: .
env: env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Trigger Docker build workflow
uses: actions/github-script@v7
with:
script: |
github.rest.actions.createWorkflowDispatch({
owner: context.repo.owner,
repo: context.repo.repo,
workflow_id: 'docker-build.yaml',
ref: 'v${{ steps.get_version.outputs.version }}',
})
# name: Create and publish Docker images with specific build args
name: Create and publish a Docker image
# Configures this workflow to run every time a change is pushed to the branch called `release`. # Configures this workflow to run every time a change is pushed to the branch called `release`.
on: on:
workflow_dispatch:
push: push:
branches: branches:
- main - main
...@@ -23,7 +23,7 @@ jobs: ...@@ -23,7 +23,7 @@ jobs:
permissions: permissions:
contents: read contents: read
packages: write packages: write
#
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@v4 uses: actions/checkout@v4
...@@ -41,12 +41,11 @@ jobs: ...@@ -41,12 +41,11 @@ jobs:
username: ${{ github.actor }} username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }} password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata for Docker images - name: Extract metadata for Docker images (default latest tag)
id: meta id: meta-latest
uses: docker/metadata-action@v5 uses: docker/metadata-action@v5
with: with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
# This configuration dynamically generates tags based on the branch, tag, commit, and custom suffix for lite version.
tags: | tags: |
type=ref,event=branch type=ref,event=branch
type=ref,event=tag type=ref,event=tag
...@@ -56,11 +55,29 @@ jobs: ...@@ -56,11 +55,29 @@ jobs:
flavor: | flavor: |
latest=${{ github.ref == 'refs/heads/main' }} latest=${{ github.ref == 'refs/heads/main' }}
- name: Build and push Docker image - name: Build and push Docker image (latest)
uses: docker/build-push-action@v5
with:
context: .
push: true
platforms: linux/amd64,linux/arm64
tags: ${{ steps.meta-latest.outputs.tags }}
labels: ${{ steps.meta-latest.outputs.labels }}
- name: Build and push Docker image with CUDA
uses: docker/build-push-action@v5
with:
context: .
push: true
platforms: linux/amd64,linux/arm64
tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:cuda
build-args: USE_CUDA=true
- name: Build and push Docker image with Ollama
uses: docker/build-push-action@v5 uses: docker/build-push-action@v5
with: with:
context: . context: .
push: true push: true
platforms: linux/amd64,linux/arm64 platforms: linux/amd64,linux/arm64
tags: ${{ steps.meta.outputs.tags }} tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:ollama
labels: ${{ steps.meta.outputs.labels }} build-args: USE_OLLAMA=true
# syntax=docker/dockerfile:1 # syntax=docker/dockerfile:1
# Initialize device type args
# use build args in the docker build commmand with --build-arg="BUILDARG=true"
ARG USE_CUDA=false
ARG USE_OLLAMA=false
# Tested with cu117 for CUDA 11 and cu121 for CUDA 12 (default)
ARG USE_CUDA_VER=cu121
# any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers
# Leaderboard: https://huggingface.co/spaces/mteb/leaderboard
# for better performance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB)
# IMPORTANT: If you change the default model (all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them.
ARG USE_EMBEDDING_MODEL=all-MiniLM-L6-v2
FROM node:alpine as build ######## WebUI frontend ########
FROM node:21-alpine3.19 as build
WORKDIR /app WORKDIR /app
# wget embedding model weight from alpine (does not exist from slim-buster)
RUN wget "https://chroma-onnx-models.s3.amazonaws.com/all-MiniLM-L6-v2/onnx.tar.gz" -O - | \
tar -xzf - -C /app
COPY package.json package-lock.json ./ COPY package.json package-lock.json ./
RUN npm ci RUN npm ci
COPY . . COPY . .
RUN npm run build RUN npm run build
######## WebUI backend ########
FROM python:3.11-slim-bookworm as base FROM python:3.11-slim-bookworm as base
ENV ENV=prod # Use args
ENV PORT "" ARG USE_CUDA
ARG USE_OLLAMA
ENV OLLAMA_BASE_URL "/ollama" ARG USE_CUDA_VER
ARG USE_EMBEDDING_MODEL
ENV OPENAI_API_BASE_URL ""
ENV OPENAI_API_KEY "" ## Basis ##
ENV ENV=prod \
ENV WEBUI_SECRET_KEY "" PORT=8080 \
ENV WEBUI_AUTH_TRUSTED_EMAIL_HEADER "" # pass build args to the build
USE_OLLAMA_DOCKER=${USE_OLLAMA} \
ENV SCARF_NO_ANALYTICS true USE_CUDA_DOCKER=${USE_CUDA} \
ENV DO_NOT_TRACK true USE_CUDA_DOCKER_VER=${USE_CUDA_VER} \
USE_EMBEDDING_MODEL_DOCKER=${USE_EMBEDDING_MODEL}
######## Preloaded models ########
# whisper TTS Settings ## Basis URL Config ##
ENV WHISPER_MODEL="base" ENV OLLAMA_BASE_URL="/ollama" \
ENV WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models" OPENAI_API_BASE_URL=""
# RAG Embedding Model Settings ## API Key and Security Config ##
# any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers ENV OPENAI_API_KEY="" \
# Leaderboard: https://huggingface.co/spaces/mteb/leaderboard WEBUI_SECRET_KEY="" \
# for better persormance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB) SCARF_NO_ANALYTICS=true \
# IMPORTANT: If you change the default model (all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them. DO_NOT_TRACK=true
ENV RAG_EMBEDDING_MODEL="all-MiniLM-L6-v2"
# device type for whisper tts and embbeding models - "cpu" (default), "cuda" (nvidia gpu and CUDA required) or "mps" (apple silicon) - choosing this right can lead to better performance # Use locally bundled version of the LiteLLM cost map json
ENV RAG_EMBEDDING_MODEL_DEVICE_TYPE="cpu" # to avoid repetitive startup connections
ENV RAG_EMBEDDING_MODEL_DIR="/app/backend/data/cache/embedding/models" ENV LITELLM_LOCAL_MODEL_COST_MAP="True"
ENV SENTENCE_TRANSFORMERS_HOME $RAG_EMBEDDING_MODEL_DIR
######## Preloaded models ######## #### Other models #########################################################
## whisper TTS model settings ##
ENV WHISPER_MODEL="base" \
WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models"
## RAG Embedding model settings ##
ENV RAG_EMBEDDING_MODEL="$USE_EMBEDDING_MODEL_DOCKER" \
RAG_EMBEDDING_MODEL_DIR="/app/backend/data/cache/embedding/models" \
SENTENCE_TRANSFORMERS_HOME="/app/backend/data/cache/embedding/models"
#### Other models ##########################################################
WORKDIR /app/backend WORKDIR /app/backend
# install python dependencies # install python dependencies
COPY ./backend/requirements.txt ./requirements.txt COPY ./backend/requirements.txt ./requirements.txt
RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y RUN if [ "$USE_CUDA" = "true" ]; then \
# If you use CUDA the whisper and embedding modell will be downloaded on first use
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir && \
pip3 install -r requirements.txt --no-cache-dir && \
python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \
python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device='cpu')"; \
else \
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \
pip3 install -r requirements.txt --no-cache-dir && \
python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \
python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device='cpu')"; \
fi
RUN if [ "$USE_OLLAMA" = "true" ]; then \
apt-get update && \
# Install pandoc and netcat
apt-get install -y --no-install-recommends pandoc netcat-openbsd && \
# for RAG OCR
apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \
# install helper tools
apt-get install -y --no-install-recommends curl && \
# install ollama
curl -fsSL https://ollama.com/install.sh | sh && \
# cleanup
rm -rf /var/lib/apt/lists/*; \
else \
apt-get update && \
# Install pandoc and netcat
apt-get install -y --no-install-recommends pandoc netcat-openbsd && \
# for RAG OCR
apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \
# cleanup
rm -rf /var/lib/apt/lists/*; \
fi
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir
RUN pip3 install -r requirements.txt --no-cache-dir
# Install pandoc and netcat
# RUN python -c "import pypandoc; pypandoc.download_pandoc()"
RUN apt-get update \
&& apt-get install -y pandoc netcat-openbsd \
&& rm -rf /var/lib/apt/lists/*
# preload embedding model
RUN python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device=os.environ['RAG_EMBEDDING_MODEL_DEVICE_TYPE'])"
# preload tts model
RUN python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='auto', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"
# copy embedding weight from build # copy embedding weight from build
RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2 # RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2
COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx # COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx
# copy built frontend files # copy built frontend files
COPY --from=build /app/build /app/build COPY --from=build /app/build /app/build
...@@ -82,4 +120,6 @@ COPY --from=build /app/package.json /app/package.json ...@@ -82,4 +120,6 @@ COPY --from=build /app/package.json /app/package.json
# copy backend files # copy backend files
COPY ./backend . COPY ./backend .
CMD [ "bash", "start.sh"] EXPOSE 8080
CMD [ "bash", "start.sh"]
\ No newline at end of file
...@@ -113,6 +113,65 @@ Don't forget to explore our sibling project, [Open WebUI Community](https://open ...@@ -113,6 +113,65 @@ Don't forget to explore our sibling project, [Open WebUI Community](https://open
- After installation, you can access Open WebUI at [http://localhost:3000](http://localhost:3000). Enjoy! 😄 - After installation, you can access Open WebUI at [http://localhost:3000](http://localhost:3000). Enjoy! 😄
- **If you want to customize your build with additional args**, use this commands:
> [!NOTE]
> If you only want to use Open WebUI with Ollama included or CUDA acelleration it's recomented to use our official images with the tags :cuda or :with-ollama
> If you want a combination of both or more customisation options like a different embedding model and/or CUDA version you need to build the image yourself following the instructions below.
**For the build:**
```bash
docker build -t open-webui
```
Optional build ARGS (use them in the docker build command below if needed):
e.g.
```bash
--build-arg="USE_EMBEDDING_MODEL=intfloat/multilingual-e5-large"
```
For "intfloat/multilingual-e5-large" custom embedding model (default is all-MiniLM-L6-v2), only works with [sentence transforer models](https://huggingface.co/models?library=sentence-transformers). Current [Leaderbord](https://huggingface.co/spaces/mteb/leaderboard) of embedding models.
```bash
--build-arg="USE_OLLAMA=true"
```
For including ollama in the image.
```bash
--build-arg="USE_CUDA=true"
```
To use CUDA exeleration for the embedding and whisper models.
> [!NOTE]
> You need to install the [Nvidia CUDA container toolkit](https://docs.nvidia.com/dgx/nvidia-container-runtime-upgrade/) on your machine to be able to set CUDA as the Docker engine. Only works with Linux - use WSL for Windows!
```bash
--build-arg="USE_CUDA_VER=cu117"
```
For CUDA 11 (default is CUDA 12)
**To run the image:**
- **If you DID NOT use the USE_CUDA=true build ARG**, use this command:
```bash
docker run -d -p 3000:8080 -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main
```
- **If you DID use the USE_CUDA=true build ARG**, use this command:
```bash
docker run --gpus all -d -p 3000:8080 -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main
```
- After installation, you can access Open WebUI at [http://localhost:3000](http://localhost:3000). Enjoy! 😄
#### Open WebUI: Server Connection Error #### Open WebUI: Server Connection Error
If you're experiencing connection issues, it’s often due to the WebUI docker container not being able to reach the Ollama server at 127.0.0.1:11434 (host.docker.internal:11434) inside the container . Use the `--network=host` flag in your docker command to resolve this. Note that the port changes from 3000 to 8080, resulting in the link: `http://localhost:8080`. If you're experiencing connection issues, it’s often due to the WebUI docker container not being able to reach the Ollama server at 127.0.0.1:11434 (host.docker.internal:11434) inside the container . Use the `--network=host` flag in your docker command to resolve this. Note that the port changes from 3000 to 8080, resulting in the link: `http://localhost:8080`.
......
...@@ -28,6 +28,7 @@ from config import ( ...@@ -28,6 +28,7 @@ from config import (
UPLOAD_DIR, UPLOAD_DIR,
WHISPER_MODEL, WHISPER_MODEL,
WHISPER_MODEL_DIR, WHISPER_MODEL_DIR,
DEVICE_TYPE,
) )
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
...@@ -42,6 +43,10 @@ app.add_middleware( ...@@ -42,6 +43,10 @@ app.add_middleware(
allow_headers=["*"], allow_headers=["*"],
) )
# setting device type for whisper model
whisper_device_type = DEVICE_TYPE if DEVICE_TYPE and DEVICE_TYPE == "cuda" else "cpu"
log.info(f"whisper_device_type: {whisper_device_type}")
@app.post("/transcribe") @app.post("/transcribe")
def transcribe( def transcribe(
...@@ -66,7 +71,7 @@ def transcribe( ...@@ -66,7 +71,7 @@ def transcribe(
model = WhisperModel( model = WhisperModel(
WHISPER_MODEL, WHISPER_MODEL,
device="auto", device=whisper_device_type,
compute_type="int8", compute_type="int8",
download_root=WHISPER_MODEL_DIR, download_root=WHISPER_MODEL_DIR,
) )
......
...@@ -215,7 +215,8 @@ async def get_ollama_versions(url_idx: Optional[int] = None): ...@@ -215,7 +215,8 @@ async def get_ollama_versions(url_idx: Optional[int] = None):
if len(responses) > 0: if len(responses) > 0:
lowest_version = min( lowest_version = min(
responses, key=lambda x: tuple(map(int, x["version"].split("."))) responses,
key=lambda x: tuple(map(int, x["version"].split("-")[0].split("."))),
) )
return {"version": lowest_version["version"]} return {"version": lowest_version["version"]}
......
...@@ -58,8 +58,8 @@ from config import ( ...@@ -58,8 +58,8 @@ from config import (
UPLOAD_DIR, UPLOAD_DIR,
DOCS_DIR, DOCS_DIR,
RAG_EMBEDDING_MODEL, RAG_EMBEDDING_MODEL,
RAG_EMBEDDING_MODEL_DEVICE_TYPE,
RAG_EMBEDDING_MODEL_AUTO_UPDATE, RAG_EMBEDDING_MODEL_AUTO_UPDATE,
DEVICE_TYPE,
CHROMA_CLIENT, CHROMA_CLIENT,
CHUNK_SIZE, CHUNK_SIZE,
CHUNK_OVERLAP, CHUNK_OVERLAP,
...@@ -86,7 +86,7 @@ app.state.TOP_K = 4 ...@@ -86,7 +86,7 @@ app.state.TOP_K = 4
app.state.sentence_transformer_ef = ( app.state.sentence_transformer_ef = (
embedding_functions.SentenceTransformerEmbeddingFunction( embedding_functions.SentenceTransformerEmbeddingFunction(
model_name=app.state.RAG_EMBEDDING_MODEL_PATH, model_name=app.state.RAG_EMBEDDING_MODEL_PATH,
device=RAG_EMBEDDING_MODEL_DEVICE_TYPE, device=DEVICE_TYPE,
) )
) )
...@@ -154,7 +154,7 @@ async def update_embedding_model( ...@@ -154,7 +154,7 @@ async def update_embedding_model(
app.state.sentence_transformer_ef = ( app.state.sentence_transformer_ef = (
embedding_functions.SentenceTransformerEmbeddingFunction( embedding_functions.SentenceTransformerEmbeddingFunction(
model_name=app.state.RAG_EMBEDDING_MODEL_PATH, model_name=app.state.RAG_EMBEDDING_MODEL_PATH,
device=RAG_EMBEDDING_MODEL_DEVICE_TYPE, device=DEVICE_TYPE,
) )
) )
except Exception as e: except Exception as e:
...@@ -471,25 +471,11 @@ def store_doc( ...@@ -471,25 +471,11 @@ def store_doc(
log.info(f"file.content_type: {file.content_type}") log.info(f"file.content_type: {file.content_type}")
try: try:
is_valid_filename = True
unsanitized_filename = file.filename unsanitized_filename = file.filename
if re.search(r'[\\/:"\*\?<>|\n\t ]', unsanitized_filename) is not None: filename = os.path.basename(unsanitized_filename)
is_valid_filename = False
unvalidated_file_path = f"{UPLOAD_DIR}/{unsanitized_filename}" file_path = f"{UPLOAD_DIR}/{filename}"
dereferenced_file_path = str(Path(unvalidated_file_path).resolve(strict=False))
if not dereferenced_file_path.startswith(UPLOAD_DIR):
is_valid_filename = False
if is_valid_filename:
file_path = dereferenced_file_path
else:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.DEFAULT(),
)
filename = file.filename
contents = file.file.read() contents = file.file.read()
with open(file_path, "wb") as f: with open(file_path, "wb") as f:
f.write(contents) f.write(contents)
...@@ -500,7 +486,7 @@ def store_doc( ...@@ -500,7 +486,7 @@ def store_doc(
collection_name = calculate_sha256(f)[:63] collection_name = calculate_sha256(f)[:63]
f.close() f.close()
loader, known_type = get_loader(file.filename, file.content_type, file_path) loader, known_type = get_loader(filename, file.content_type, file_path)
data = loader.load() data = loader.load()
try: try:
......
...@@ -86,6 +86,7 @@ class SignupForm(BaseModel): ...@@ -86,6 +86,7 @@ class SignupForm(BaseModel):
name: str name: str
email: str email: str
password: str password: str
profile_image_url: Optional[str] = "/user.png"
class AuthsTable: class AuthsTable:
...@@ -94,7 +95,12 @@ class AuthsTable: ...@@ -94,7 +95,12 @@ class AuthsTable:
self.db.create_tables([Auth]) self.db.create_tables([Auth])
def insert_new_auth( def insert_new_auth(
self, email: str, password: str, name: str, role: str = "pending" self,
email: str,
password: str,
name: str,
profile_image_url: str = "/user.png",
role: str = "pending",
) -> Optional[UserModel]: ) -> Optional[UserModel]:
log.info("insert_new_auth") log.info("insert_new_auth")
...@@ -105,7 +111,7 @@ class AuthsTable: ...@@ -105,7 +111,7 @@ class AuthsTable:
) )
result = Auth.create(**auth.model_dump()) result = Auth.create(**auth.model_dump())
user = Users.insert_new_user(id, name, email, role) user = Users.insert_new_user(id, name, email, profile_image_url, role)
if result and user: if result and user:
return user return user
......
...@@ -206,6 +206,18 @@ class ChatTable: ...@@ -206,6 +206,18 @@ class ChatTable:
except: except:
return None return None
def get_chat_by_share_id(self, id: str) -> Optional[ChatModel]:
try:
chat = Chat.get(Chat.share_id == id)
if chat:
chat = Chat.get(Chat.id == id)
return ChatModel(**model_to_dict(chat))
else:
return None
except:
return None
def get_chat_by_id_and_user_id(self, id: str, user_id: str) -> Optional[ChatModel]: def get_chat_by_id_and_user_id(self, id: str, user_id: str) -> Optional[ChatModel]:
try: try:
chat = Chat.get(Chat.id == id, Chat.user_id == user_id) chat = Chat.get(Chat.id == id, Chat.user_id == user_id)
......
...@@ -31,7 +31,7 @@ class UserModel(BaseModel): ...@@ -31,7 +31,7 @@ class UserModel(BaseModel):
name: str name: str
email: str email: str
role: str = "pending" role: str = "pending"
profile_image_url: str = "/user.png" profile_image_url: str
timestamp: int # timestamp in epoch timestamp: int # timestamp in epoch
api_key: Optional[str] = None api_key: Optional[str] = None
...@@ -59,7 +59,12 @@ class UsersTable: ...@@ -59,7 +59,12 @@ class UsersTable:
self.db.create_tables([User]) self.db.create_tables([User])
def insert_new_user( def insert_new_user(
self, id: str, name: str, email: str, role: str = "pending" self,
id: str,
name: str,
email: str,
profile_image_url: str = "/user.png",
role: str = "pending",
) -> Optional[UserModel]: ) -> Optional[UserModel]:
user = UserModel( user = UserModel(
**{ **{
...@@ -67,7 +72,7 @@ class UsersTable: ...@@ -67,7 +72,7 @@ class UsersTable:
"name": name, "name": name,
"email": email, "email": email,
"role": role, "role": role,
"profile_image_url": "/user.png", "profile_image_url": profile_image_url,
"timestamp": int(time.time()), "timestamp": int(time.time()),
} }
) )
......
...@@ -163,7 +163,11 @@ async def signup(request: Request, form_data: SignupForm): ...@@ -163,7 +163,11 @@ async def signup(request: Request, form_data: SignupForm):
) )
hashed = get_password_hash(form_data.password) hashed = get_password_hash(form_data.password)
user = Auths.insert_new_auth( user = Auths.insert_new_auth(
form_data.email.lower(), hashed, form_data.name, role form_data.email.lower(),
hashed,
form_data.name,
form_data.profile_image_url,
role,
) )
if user: if user:
......
...@@ -251,7 +251,15 @@ async def delete_shared_chat_by_id(id: str, user=Depends(get_current_user)): ...@@ -251,7 +251,15 @@ async def delete_shared_chat_by_id(id: str, user=Depends(get_current_user)):
@router.get("/share/{share_id}", response_model=Optional[ChatResponse]) @router.get("/share/{share_id}", response_model=Optional[ChatResponse])
async def get_shared_chat_by_id(share_id: str, user=Depends(get_current_user)): async def get_shared_chat_by_id(share_id: str, user=Depends(get_current_user)):
chat = Chats.get_chat_by_id(share_id) if user.role == "pending":
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED, detail=ERROR_MESSAGES.NOT_FOUND
)
if user.role == "user":
chat = Chats.get_chat_by_share_id(share_id)
elif user.role == "admin":
chat = Chats.get_chat_by_id(share_id)
if chat: if chat:
return ChatResponse(**{**chat.model_dump(), "chat": json.loads(chat.chat)}) return ChatResponse(**{**chat.model_dump(), "chat": json.loads(chat.chat)})
......
from fastapi import APIRouter, UploadFile, File, BackgroundTasks from fastapi import APIRouter, UploadFile, File, Response
from fastapi import Depends, HTTPException, status from fastapi import Depends, HTTPException, status
from starlette.responses import StreamingResponse, FileResponse from starlette.responses import StreamingResponse, FileResponse
from pydantic import BaseModel from pydantic import BaseModel
from fpdf import FPDF
import markdown import markdown
import requests
import os
import aiohttp
import json
from utils.utils import get_admin_user from utils.utils import get_admin_user
...@@ -18,7 +13,7 @@ from utils.misc import calculate_sha256, get_gravatar_url ...@@ -18,7 +13,7 @@ from utils.misc import calculate_sha256, get_gravatar_url
from config import OLLAMA_BASE_URLS, DATA_DIR, UPLOAD_DIR from config import OLLAMA_BASE_URLS, DATA_DIR, UPLOAD_DIR
from constants import ERROR_MESSAGES from constants import ERROR_MESSAGES
from typing import List
router = APIRouter() router = APIRouter()
...@@ -41,6 +36,59 @@ async def get_html_from_markdown( ...@@ -41,6 +36,59 @@ async def get_html_from_markdown(
return {"html": markdown.markdown(form_data.md)} return {"html": markdown.markdown(form_data.md)}
class ChatForm(BaseModel):
title: str
messages: List[dict]
@router.post("/pdf")
async def download_chat_as_pdf(
form_data: ChatForm,
):
pdf = FPDF()
pdf.add_page()
STATIC_DIR = "./static"
FONTS_DIR = f"{STATIC_DIR}/fonts"
pdf.add_font("NotoSans", "", f"{FONTS_DIR}/NotoSans-Regular.ttf")
pdf.add_font("NotoSans", "b", f"{FONTS_DIR}/NotoSans-Bold.ttf")
pdf.add_font("NotoSans", "i", f"{FONTS_DIR}/NotoSans-Italic.ttf")
pdf.add_font("NotoSansKR", "", f"{FONTS_DIR}/NotoSansKR-Regular.ttf")
pdf.add_font("NotoSansJP", "", f"{FONTS_DIR}/NotoSansJP-Regular.ttf")
pdf.set_font("NotoSans", size=12)
pdf.set_fallback_fonts(["NotoSansKR", "NotoSansJP"])
pdf.set_auto_page_break(auto=True, margin=15)
# Adjust the effective page width for multi_cell
effective_page_width = (
pdf.w - 2 * pdf.l_margin - 10
) # Subtracted an additional 10 for extra padding
# Add chat messages
for message in form_data.messages:
role = message["role"]
content = message["content"]
pdf.set_font("NotoSans", "B", size=14) # Bold for the role
pdf.multi_cell(effective_page_width, 10, f"{role.upper()}", 0, "L")
pdf.ln(1) # Extra space between messages
pdf.set_font("NotoSans", size=10) # Regular for content
pdf.multi_cell(effective_page_width, 6, content, 0, "L")
pdf.ln(1.5) # Extra space between messages
# Save the pdf with name .pdf
pdf_bytes = pdf.output()
return Response(
content=bytes(pdf_bytes),
media_type="application/pdf",
headers={"Content-Disposition": f"attachment;filename=chat.pdf"},
)
@router.get("/db/download") @router.get("/db/download")
async def download_db(user=Depends(get_admin_user)): async def download_db(user=Depends(get_admin_user)):
......
...@@ -257,6 +257,7 @@ OLLAMA_API_BASE_URL = os.environ.get( ...@@ -257,6 +257,7 @@ OLLAMA_API_BASE_URL = os.environ.get(
OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "") OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "")
K8S_FLAG = os.environ.get("K8S_FLAG", "") K8S_FLAG = os.environ.get("K8S_FLAG", "")
USE_OLLAMA_DOCKER = os.environ.get("USE_OLLAMA_DOCKER", "false")
if OLLAMA_BASE_URL == "" and OLLAMA_API_BASE_URL != "": if OLLAMA_BASE_URL == "" and OLLAMA_API_BASE_URL != "":
OLLAMA_BASE_URL = ( OLLAMA_BASE_URL = (
...@@ -266,9 +267,13 @@ if OLLAMA_BASE_URL == "" and OLLAMA_API_BASE_URL != "": ...@@ -266,9 +267,13 @@ if OLLAMA_BASE_URL == "" and OLLAMA_API_BASE_URL != "":
) )
if ENV == "prod": if ENV == "prod":
if OLLAMA_BASE_URL == "/ollama": if OLLAMA_BASE_URL == "/ollama" and not K8S_FLAG:
OLLAMA_BASE_URL = "http://host.docker.internal:11434" if USE_OLLAMA_DOCKER.lower() == "true":
# if you use all-in-one docker container (Open WebUI + Ollama)
# with the docker build arg USE_OLLAMA=true (--build-arg="USE_OLLAMA=true") this only works with http://localhost:11434
OLLAMA_BASE_URL = "http://localhost:11434"
else:
OLLAMA_BASE_URL = "http://host.docker.internal:11434"
elif K8S_FLAG: elif K8S_FLAG:
OLLAMA_BASE_URL = "http://ollama-service.open-webui.svc.cluster.local:11434" OLLAMA_BASE_URL = "http://ollama-service.open-webui.svc.cluster.local:11434"
...@@ -391,13 +396,21 @@ if WEBUI_AUTH and WEBUI_SECRET_KEY == "": ...@@ -391,13 +396,21 @@ if WEBUI_AUTH and WEBUI_SECRET_KEY == "":
CHROMA_DATA_PATH = f"{DATA_DIR}/vector_db" CHROMA_DATA_PATH = f"{DATA_DIR}/vector_db"
# this uses the model defined in the Dockerfile ENV variable. If you dont use docker or docker based deployments such as k8s, the default embedding model will be used (all-MiniLM-L6-v2) # this uses the model defined in the Dockerfile ENV variable. If you dont use docker or docker based deployments such as k8s, the default embedding model will be used (all-MiniLM-L6-v2)
RAG_EMBEDDING_MODEL = os.environ.get("RAG_EMBEDDING_MODEL", "all-MiniLM-L6-v2") RAG_EMBEDDING_MODEL = os.environ.get("RAG_EMBEDDING_MODEL", "all-MiniLM-L6-v2")
# device type ebbeding models - "cpu" (default), "cuda" (nvidia gpu required) or "mps" (apple silicon) - choosing this right can lead to better performance log.info(f"Embedding model set: {RAG_EMBEDDING_MODEL}"),
RAG_EMBEDDING_MODEL_DEVICE_TYPE = os.environ.get(
"RAG_EMBEDDING_MODEL_DEVICE_TYPE", "cpu"
)
RAG_EMBEDDING_MODEL_AUTO_UPDATE = False RAG_EMBEDDING_MODEL_AUTO_UPDATE = False
if os.environ.get("RAG_EMBEDDING_MODEL_AUTO_UPDATE", "").lower() == "true": if os.environ.get("RAG_EMBEDDING_MODEL_AUTO_UPDATE", "").lower() == "true":
RAG_EMBEDDING_MODEL_AUTO_UPDATE = True RAG_EMBEDDING_MODEL_AUTO_UPDATE = True
# device type ebbeding models - "cpu" (default), "cuda" (nvidia gpu required) or "mps" (apple silicon) - choosing this right can lead to better performance
USE_CUDA = os.environ.get("USE_CUDA_DOCKER", "false")
if USE_CUDA.lower() == "true":
DEVICE_TYPE = "cuda"
else:
DEVICE_TYPE = "cpu"
CHROMA_CLIENT = chromadb.PersistentClient( CHROMA_CLIENT = chromadb.PersistentClient(
path=CHROMA_DATA_PATH, path=CHROMA_DATA_PATH,
settings=Settings(allow_reset=True, anonymized_telemetry=False), settings=Settings(allow_reset=True, anonymized_telemetry=False),
......
...@@ -42,6 +42,8 @@ xlrd ...@@ -42,6 +42,8 @@ xlrd
opencv-python-headless opencv-python-headless
rapidocr-onnxruntime rapidocr-onnxruntime
fpdf2
faster-whisper faster-whisper
PyJWT PyJWT
......
...@@ -7,16 +7,26 @@ KEY_FILE=.webui_secret_key ...@@ -7,16 +7,26 @@ KEY_FILE=.webui_secret_key
PORT="${PORT:-8080}" PORT="${PORT:-8080}"
if test "$WEBUI_SECRET_KEY $WEBUI_JWT_SECRET_KEY" = " "; then if test "$WEBUI_SECRET_KEY $WEBUI_JWT_SECRET_KEY" = " "; then
echo No WEBUI_SECRET_KEY provided echo "No WEBUI_SECRET_KEY provided"
if ! [ -e "$KEY_FILE" ]; then if ! [ -e "$KEY_FILE" ]; then
echo Generating WEBUI_SECRET_KEY echo "Generating WEBUI_SECRET_KEY"
# Generate a random value to use as a WEBUI_SECRET_KEY in case the user didn't provide one. # Generate a random value to use as a WEBUI_SECRET_KEY in case the user didn't provide one.
echo $(head -c 12 /dev/random | base64) > $KEY_FILE echo $(head -c 12 /dev/random | base64) > "$KEY_FILE"
fi fi
echo Loading WEBUI_SECRET_KEY from $KEY_FILE echo "Loading WEBUI_SECRET_KEY from $KEY_FILE"
WEBUI_SECRET_KEY=`cat $KEY_FILE` WEBUI_SECRET_KEY=$(cat "$KEY_FILE")
fi fi
WEBUI_SECRET_KEY="$WEBUI_SECRET_KEY" exec uvicorn main:app --host 0.0.0.0 --port "$PORT" --forwarded-allow-ips '*' if [ "$USE_OLLAMA_DOCKER" = "true" ]; then
\ No newline at end of file echo "USE_OLLAMA is set to true, starting ollama serve."
ollama serve &
fi
if [ "$USE_CUDA_DOCKER" = "true" ]; then
echo "CUDA is enabled, appending LD_LIBRARY_PATH to include torch/cudnn & cublas libraries."
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/python3.11/site-packages/torch/lib:/usr/local/lib/python3.11/site-packages/nvidia/cudnn/lib"
fi
WEBUI_SECRET_KEY="$WEBUI_SECRET_KEY" exec uvicorn main:app --host 0.0.0.0 --port "$PORT" --forwarded-allow-ips '*'
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment