Unverified Commit 78284e49 authored by Timothy Jaeryang Baek's avatar Timothy Jaeryang Baek Committed by GitHub
Browse files

Merge pull request #1488 from open-webui/dev

0.1.118
parents 331fe04d 64a6db4b
......@@ -57,3 +57,14 @@ jobs:
path: .
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Trigger Docker build workflow
uses: actions/github-script@v7
with:
script: |
github.rest.actions.createWorkflowDispatch({
owner: context.repo.owner,
repo: context.repo.repo,
workflow_id: 'docker-build.yaml',
ref: 'v${{ steps.get_version.outputs.version }}',
})
#
name: Create and publish a Docker image
name: Create and publish Docker images with specific build args
# Configures this workflow to run every time a change is pushed to the branch called `release`.
on:
workflow_dispatch:
push:
branches:
- main
......@@ -10,30 +9,39 @@ on:
tags:
- v*
# Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds.
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
FULL_IMAGE_NAME: ghcr.io/${{ github.repository }}
# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu.
jobs:
build-and-push-image:
build-main-image:
runs-on: ubuntu-latest
# Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job.
permissions:
contents: read
packages: write
#
strategy:
fail-fast: false
matrix:
platform:
- linux/amd64
- linux/arm64
steps:
- name: Prepare
run: |
platform=${{ matrix.platform }}
echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
- name: Checkout repository
uses: actions/checkout@v4
# Required for multi architecture build
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
# Required for multi architecture build
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
# Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here.
- name: Log in to the Container registry
uses: docker/login-action@v3
with:
......@@ -41,12 +49,11 @@ jobs:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata for Docker images
- name: Extract metadata for Docker images (default latest tag)
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
# This configuration dynamically generates tags based on the branch, tag, commit, and custom suffix for lite version.
images: ${{ env.FULL_IMAGE_NAME }}
tags: |
type=ref,event=branch
type=ref,event=tag
......@@ -56,11 +63,322 @@ jobs:
flavor: |
latest=${{ github.ref == 'refs/heads/main' }}
- name: Build and push Docker image
- name: Build Docker image (latest)
uses: docker/build-push-action@v5
id: build
with:
context: .
push: true
platforms: linux/amd64,linux/arm64
tags: ${{ steps.meta.outputs.tags }}
platforms: ${{ matrix.platform }}
labels: ${{ steps.meta.outputs.labels }}
outputs: type=image,name=${{ env.FULL_IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Export digest
run: |
mkdir -p /tmp/digests
digest="${{ steps.build.outputs.digest }}"
touch "/tmp/digests/${digest#sha256:}"
- name: Upload digest
uses: actions/upload-artifact@v4
with:
name: digests-main-${{ env.PLATFORM_PAIR }}
path: /tmp/digests/*
if-no-files-found: error
retention-days: 1
build-cuda-image:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
strategy:
fail-fast: false
matrix:
platform:
- linux/amd64
- linux/arm64
steps:
- name: Prepare
run: |
platform=${{ matrix.platform }}
echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to the Container registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata for Docker images (default latest tag)
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.FULL_IMAGE_NAME }}
tags: |
type=ref,event=branch
type=ref,event=tag
type=sha,prefix=git-
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=raw,enable=${{ github.ref == 'refs/heads/main' }},prefix=,suffix=,value=cuda
flavor: |
latest=${{ github.ref == 'refs/heads/main' }}
suffix=-cuda,onlatest=true
- name: Build Docker image (cuda)
uses: docker/build-push-action@v5
id: build
with:
context: .
push: true
platforms: ${{ matrix.platform }}
labels: ${{ steps.meta.outputs.labels }}
outputs: type=image,name=${{ env.FULL_IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true
cache-from: type=gha
cache-to: type=gha,mode=max
build-args: USE_CUDA=true
- name: Export digest
run: |
mkdir -p /tmp/digests
digest="${{ steps.build.outputs.digest }}"
touch "/tmp/digests/${digest#sha256:}"
- name: Upload digest
uses: actions/upload-artifact@v4
with:
name: digests-cuda-${{ env.PLATFORM_PAIR }}
path: /tmp/digests/*
if-no-files-found: error
retention-days: 1
build-ollama-image:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
strategy:
fail-fast: false
matrix:
platform:
- linux/amd64
- linux/arm64
steps:
- name: Prepare
run: |
platform=${{ matrix.platform }}
echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to the Container registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata for Docker images (ollama tag)
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.FULL_IMAGE_NAME }}
tags: |
type=ref,event=branch
type=ref,event=tag
type=sha,prefix=git-
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=raw,enable=${{ github.ref == 'refs/heads/main' }},prefix=,suffix=,value=ollama
flavor: |
latest=${{ github.ref == 'refs/heads/main' }}
suffix=-ollama,onlatest=true
- name: Build Docker image (ollama)
uses: docker/build-push-action@v5
id: build
with:
context: .
push: true
platforms: ${{ matrix.platform }}
labels: ${{ steps.meta.outputs.labels }}
outputs: type=image,name=${{ env.FULL_IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true
cache-from: type=gha
cache-to: type=gha,mode=max
build-args: USE_OLLAMA=true
- name: Export digest
run: |
mkdir -p /tmp/digests
digest="${{ steps.build.outputs.digest }}"
touch "/tmp/digests/${digest#sha256:}"
- name: Upload digest
uses: actions/upload-artifact@v4
with:
name: digests-ollama-${{ env.PLATFORM_PAIR }}
path: /tmp/digests/*
if-no-files-found: error
retention-days: 1
merge-main-images:
runs-on: ubuntu-latest
needs: [ build-main-image ]
steps:
- name: Download digests
uses: actions/download-artifact@v4
with:
pattern: digests-main-*
path: /tmp/digests
merge-multiple: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to the Container registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata for Docker images (default latest tag)
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.FULL_IMAGE_NAME }}
tags: |
type=ref,event=branch
type=ref,event=tag
type=sha,prefix=git-
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
flavor: |
latest=${{ github.ref == 'refs/heads/main' }}
- name: Create manifest list and push
working-directory: /tmp/digests
run: |
docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
$(printf '${{ env.FULL_IMAGE_NAME }}@sha256:%s ' *)
- name: Inspect image
run: |
docker buildx imagetools inspect ${{ env.FULL_IMAGE_NAME }}:${{ steps.meta.outputs.version }}
merge-cuda-images:
runs-on: ubuntu-latest
needs: [ build-cuda-image ]
steps:
- name: Download digests
uses: actions/download-artifact@v4
with:
pattern: digests-cuda-*
path: /tmp/digests
merge-multiple: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to the Container registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata for Docker images (default latest tag)
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.FULL_IMAGE_NAME }}
tags: |
type=ref,event=branch
type=ref,event=tag
type=sha,prefix=git-
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=raw,enable=${{ github.ref == 'refs/heads/main' }},prefix=,suffix=,value=cuda
flavor: |
latest=${{ github.ref == 'refs/heads/main' }}
suffix=-cuda,onlatest=true
- name: Create manifest list and push
working-directory: /tmp/digests
run: |
docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
$(printf '${{ env.FULL_IMAGE_NAME }}@sha256:%s ' *)
- name: Inspect image
run: |
docker buildx imagetools inspect ${{ env.FULL_IMAGE_NAME }}:${{ steps.meta.outputs.version }}
merge-ollama-images:
runs-on: ubuntu-latest
needs: [ build-ollama-image ]
steps:
- name: Download digests
uses: actions/download-artifact@v4
with:
pattern: digests-ollama-*
path: /tmp/digests
merge-multiple: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to the Container registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata for Docker images (default ollama tag)
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.FULL_IMAGE_NAME }}
tags: |
type=ref,event=branch
type=ref,event=tag
type=sha,prefix=git-
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=raw,enable=${{ github.ref == 'refs/heads/main' }},prefix=,suffix=,value=ollama
flavor: |
latest=${{ github.ref == 'refs/heads/main' }}
suffix=-ollama,onlatest=true
- name: Create manifest list and push
working-directory: /tmp/digests
run: |
docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
$(printf '${{ env.FULL_IMAGE_NAME }}@sha256:%s ' *)
- name: Inspect image
run: |
docker buildx imagetools inspect ${{ env.FULL_IMAGE_NAME }}:${{ steps.meta.outputs.version }}
......@@ -5,6 +5,26 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [0.1.118] - 2024-04-10
### Added
- **🦙 Ollama and CUDA Images**: Added support for `:ollama` and `:cuda` tagged images.
- **👍 Enhanced Response Rating**: Now you can annotate your ratings for better feedback.
- **👤 User Initials Profile Photo**: User initials are now the default profile photo.
- **🔍 Update RAG Embedding Model**: Customize RAG embedding model directly in document settings.
- **🌍 Additional Language Support**: Added Turkish language support.
### Fixed
- **🔒 Share Chat Permission**: Resolved issue with chat sharing permissions.
- **🛠 Modal Close**: Modals can now be closed using the Esc key.
### Changed
- **🎨 Admin Panel Styling**: Refreshed styling for the admin panel.
- **🐳 Docker Image Build**: Updated docker image build process for improved efficiency.
## [0.1.117] - 2024-04-03
### Added
......
# syntax=docker/dockerfile:1
# Initialize device type args
# use build args in the docker build commmand with --build-arg="BUILDARG=true"
ARG USE_CUDA=false
ARG USE_OLLAMA=false
# Tested with cu117 for CUDA 11 and cu121 for CUDA 12 (default)
ARG USE_CUDA_VER=cu121
# any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers
# Leaderboard: https://huggingface.co/spaces/mteb/leaderboard
# for better performance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB)
# IMPORTANT: If you change the default model (all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them.
ARG USE_EMBEDDING_MODEL=all-MiniLM-L6-v2
FROM node:alpine as build
######## WebUI frontend ########
FROM --platform=$BUILDPLATFORM node:21-alpine3.19 as build
WORKDIR /app
# wget embedding model weight from alpine (does not exist from slim-buster)
RUN wget "https://chroma-onnx-models.s3.amazonaws.com/all-MiniLM-L6-v2/onnx.tar.gz" -O - | \
tar -xzf - -C /app
COPY package.json package-lock.json ./
RUN npm ci
COPY . .
RUN npm run build
######## WebUI backend ########
FROM python:3.11-slim-bookworm as base
ENV ENV=prod
ENV PORT ""
ENV OLLAMA_BASE_URL "/ollama"
ENV OPENAI_API_BASE_URL ""
ENV OPENAI_API_KEY ""
ENV WEBUI_SECRET_KEY ""
ENV WEBUI_AUTH_TRUSTED_EMAIL_HEADER ""
ENV SCARF_NO_ANALYTICS true
ENV DO_NOT_TRACK true
# Use args
ARG USE_CUDA
ARG USE_OLLAMA
ARG USE_CUDA_VER
ARG USE_EMBEDDING_MODEL
## Basis ##
ENV ENV=prod \
PORT=8080 \
# pass build args to the build
USE_OLLAMA_DOCKER=${USE_OLLAMA} \
USE_CUDA_DOCKER=${USE_CUDA} \
USE_CUDA_DOCKER_VER=${USE_CUDA_VER} \
USE_EMBEDDING_MODEL_DOCKER=${USE_EMBEDDING_MODEL}
## Basis URL Config ##
ENV OLLAMA_BASE_URL="/ollama" \
OPENAI_API_BASE_URL=""
## API Key and Security Config ##
ENV OPENAI_API_KEY="" \
WEBUI_SECRET_KEY="" \
SCARF_NO_ANALYTICS=true \
DO_NOT_TRACK=true
# Use locally bundled version of the LiteLLM cost map json
# to avoid repetitive startup connections
ENV LITELLM_LOCAL_MODEL_COST_MAP="True"
######## Preloaded models ########
# whisper TTS Settings
ENV WHISPER_MODEL="base"
ENV WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models"
# RAG Embedding Model Settings
# any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers
# Leaderboard: https://huggingface.co/spaces/mteb/leaderboard
# for better persormance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB)
# IMPORTANT: If you change the default model (all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them.
ENV RAG_EMBEDDING_MODEL="all-MiniLM-L6-v2"
# device type for whisper tts and embbeding models - "cpu" (default), "cuda" (nvidia gpu and CUDA required) or "mps" (apple silicon) - choosing this right can lead to better performance
ENV RAG_EMBEDDING_MODEL_DEVICE_TYPE="cpu"
ENV RAG_EMBEDDING_MODEL_DIR="/app/backend/data/cache/embedding/models"
ENV SENTENCE_TRANSFORMERS_HOME $RAG_EMBEDDING_MODEL_DIR
#### Other models #########################################################
## whisper TTS model settings ##
ENV WHISPER_MODEL="base" \
WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models"
######## Preloaded models ########
## RAG Embedding model settings ##
ENV RAG_EMBEDDING_MODEL="$USE_EMBEDDING_MODEL_DOCKER" \
RAG_EMBEDDING_MODEL_DIR="/app/backend/data/cache/embedding/models" \
SENTENCE_TRANSFORMERS_HOME="/app/backend/data/cache/embedding/models"
#### Other models ##########################################################
WORKDIR /app/backend
RUN if [ "$USE_OLLAMA" = "true" ]; then \
apt-get update && \
# Install pandoc and netcat
apt-get install -y --no-install-recommends pandoc netcat-openbsd && \
# for RAG OCR
apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \
# install helper tools
apt-get install -y --no-install-recommends curl && \
# install ollama
curl -fsSL https://ollama.com/install.sh | sh && \
# cleanup
rm -rf /var/lib/apt/lists/*; \
else \
apt-get update && \
# Install pandoc and netcat
apt-get install -y --no-install-recommends pandoc netcat-openbsd && \
# for RAG OCR
apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \
# cleanup
rm -rf /var/lib/apt/lists/*; \
fi
# install python dependencies
COPY ./backend/requirements.txt ./requirements.txt
RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir
RUN pip3 install -r requirements.txt --no-cache-dir
RUN if [ "$USE_CUDA" = "true" ]; then \
# If you use CUDA the whisper and embedding model will be downloaded on first use
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir && \
pip3 install -r requirements.txt --no-cache-dir && \
python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \
python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device='cpu')"; \
else \
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \
pip3 install -r requirements.txt --no-cache-dir && \
python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \
python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device='cpu')"; \
fi
# Install pandoc and netcat
# RUN python -c "import pypandoc; pypandoc.download_pandoc()"
RUN apt-get update \
&& apt-get install -y pandoc netcat-openbsd \
&& rm -rf /var/lib/apt/lists/*
# preload embedding model
RUN python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device=os.environ['RAG_EMBEDDING_MODEL_DEVICE_TYPE'])"
# preload tts model
RUN python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='auto', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"
# copy embedding weight from build
RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2
COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx
# RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2
# COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx
# copy built frontend files
COPY --from=build /app/build /app/build
......@@ -86,4 +120,6 @@ COPY --from=build /app/package.json /app/package.json
# copy backend files
COPY ./backend .
CMD [ "bash", "start.sh"]
EXPOSE 8080
CMD [ "bash", "start.sh"]
\ No newline at end of file
......@@ -94,24 +94,27 @@ Don't forget to explore our sibling project, [Open WebUI Community](https://open
### Quick Start with Docker 🐳
> [!IMPORTANT]
> [!WARNING]
> When using Docker to install Open WebUI, make sure to include the `-v open-webui:/app/backend/data` in your Docker command. This step is crucial as it ensures your database is properly mounted and prevents any loss of data.
- **If Ollama is on your computer**, use this command:
> [!TIP]
> If you wish to utilize Open WebUI with Ollama included or CUDA acceleration, we recommend utilizing our official images tagged with either `:cuda` or `:ollama`. To enable CUDA, you must install the [Nvidia CUDA container toolkit](https://docs.nvidia.com/dgx/nvidia-container-runtime-upgrade/) on your Linux/WSL system.
```bash
docker run -d -p 3000:8080 --add-host=host.docker.internal:host-gateway -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main
```
**If Ollama is on your computer**, use this command:
- **If Ollama is on a Different Server**, use this command:
```bash
docker run -d -p 3000:8080 --add-host=host.docker.internal:host-gateway -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main
```
**If Ollama is on a Different Server**, use this command:
- To connect to Ollama on another server, change the `OLLAMA_BASE_URL` to the server's URL:
To connect to Ollama on another server, change the `OLLAMA_BASE_URL` to the server's URL:
```bash
docker run -d -p 3000:8080 -e OLLAMA_BASE_URL=https://example.com -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main
```
```bash
docker run -d -p 3000:8080 -e OLLAMA_BASE_URL=https://example.com -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main
```
- After installation, you can access Open WebUI at [http://localhost:3000](http://localhost:3000). Enjoy! 😄
After installation, you can access Open WebUI at [http://localhost:3000](http://localhost:3000). Enjoy! 😄
#### Open WebUI: Server Connection Error
......
......@@ -28,6 +28,7 @@ from config import (
UPLOAD_DIR,
WHISPER_MODEL,
WHISPER_MODEL_DIR,
DEVICE_TYPE,
)
log = logging.getLogger(__name__)
......@@ -42,6 +43,10 @@ app.add_middleware(
allow_headers=["*"],
)
# setting device type for whisper model
whisper_device_type = DEVICE_TYPE if DEVICE_TYPE and DEVICE_TYPE == "cuda" else "cpu"
log.info(f"whisper_device_type: {whisper_device_type}")
@app.post("/transcribe")
def transcribe(
......@@ -66,7 +71,7 @@ def transcribe(
model = WhisperModel(
WHISPER_MODEL,
device="auto",
device=whisper_device_type,
compute_type="int8",
download_root=WHISPER_MODEL_DIR,
)
......
......@@ -215,7 +215,8 @@ async def get_ollama_versions(url_idx: Optional[int] = None):
if len(responses) > 0:
lowest_version = min(
responses, key=lambda x: tuple(map(int, x["version"].split(".")))
responses,
key=lambda x: tuple(map(int, x["version"].split("-")[0].split("."))),
)
return {"version": lowest_version["version"]}
......
......@@ -13,8 +13,8 @@ import os, shutil, logging, re
from pathlib import Path
from typing import List
from sentence_transformers import SentenceTransformer
from chromadb.utils import embedding_functions
from chromadb.utils.batch_utils import create_batches
from langchain_community.document_loaders import (
WebBaseLoader,
......@@ -45,7 +45,7 @@ from apps.web.models.documents import (
DocumentResponse,
)
from apps.rag.utils import query_doc, query_collection
from apps.rag.utils import query_doc, query_collection, get_embedding_model_path
from utils.misc import (
calculate_sha256,
......@@ -59,7 +59,8 @@ from config import (
UPLOAD_DIR,
DOCS_DIR,
RAG_EMBEDDING_MODEL,
RAG_EMBEDDING_MODEL_DEVICE_TYPE,
RAG_EMBEDDING_MODEL_AUTO_UPDATE,
DEVICE_TYPE,
CHROMA_CLIENT,
CHUNK_SIZE,
CHUNK_OVERLAP,
......@@ -71,28 +72,25 @@ from constants import ERROR_MESSAGES
log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["RAG"])
#
# if RAG_EMBEDDING_MODEL:
# sentence_transformer_ef = SentenceTransformer(
# model_name_or_path=RAG_EMBEDDING_MODEL,
# cache_folder=RAG_EMBEDDING_MODEL_DIR,
# device=RAG_EMBEDDING_MODEL_DEVICE_TYPE,
# )
app = FastAPI()
app.state.PDF_EXTRACT_IMAGES = False
app.state.CHUNK_SIZE = CHUNK_SIZE
app.state.CHUNK_OVERLAP = CHUNK_OVERLAP
app.state.RAG_TEMPLATE = RAG_TEMPLATE
app.state.RAG_EMBEDDING_MODEL = RAG_EMBEDDING_MODEL
app.state.TOP_K = 4
app.state.sentence_transformer_ef = (
embedding_functions.SentenceTransformerEmbeddingFunction(
model_name=app.state.RAG_EMBEDDING_MODEL,
device=RAG_EMBEDDING_MODEL_DEVICE_TYPE,
model_name=get_embedding_model_path(
app.state.RAG_EMBEDDING_MODEL, RAG_EMBEDDING_MODEL_AUTO_UPDATE
),
device=DEVICE_TYPE,
)
)
......@@ -143,18 +141,33 @@ class EmbeddingModelUpdateForm(BaseModel):
async def update_embedding_model(
form_data: EmbeddingModelUpdateForm, user=Depends(get_admin_user)
):
app.state.RAG_EMBEDDING_MODEL = form_data.embedding_model
app.state.sentence_transformer_ef = (
embedding_functions.SentenceTransformerEmbeddingFunction(
model_name=app.state.RAG_EMBEDDING_MODEL,
device=RAG_EMBEDDING_MODEL_DEVICE_TYPE,
)
log.info(
f"Updating embedding model: {app.state.RAG_EMBEDDING_MODEL} to {form_data.embedding_model}"
)
return {
"status": True,
"embedding_model": app.state.RAG_EMBEDDING_MODEL,
}
try:
sentence_transformer_ef = (
embedding_functions.SentenceTransformerEmbeddingFunction(
model_name=get_embedding_model_path(form_data.embedding_model, True),
device=DEVICE_TYPE,
)
)
app.state.RAG_EMBEDDING_MODEL = form_data.embedding_model
app.state.sentence_transformer_ef = sentence_transformer_ef
return {
"status": True,
"embedding_model": app.state.RAG_EMBEDDING_MODEL,
}
except Exception as e:
log.exception(f"Problem updating embedding model: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=ERROR_MESSAGES.DEFAULT(e),
)
@app.get("/config")
......@@ -341,9 +354,14 @@ def store_docs_in_vector_db(docs, collection_name, overwrite: bool = False) -> b
embedding_function=app.state.sentence_transformer_ef,
)
collection.add(
documents=texts, metadatas=metadatas, ids=[str(uuid.uuid1()) for _ in texts]
)
for batch in create_batches(
api=CHROMA_CLIENT,
ids=[str(uuid.uuid1()) for _ in texts],
metadatas=metadatas,
documents=texts,
):
collection.add(*batch)
return True
except Exception as e:
log.exception(e)
......
import os
import re
import logging
from typing import List
from huggingface_hub import snapshot_download
from config import SRC_LOG_LEVELS, CHROMA_CLIENT
......@@ -188,3 +190,43 @@ def rag_messages(docs, messages, template, k, embedding_function):
messages[last_user_message_idx] = new_user_message
return messages
def get_embedding_model_path(
embedding_model: str, update_embedding_model: bool = False
):
# Construct huggingface_hub kwargs with local_files_only to return the snapshot path
cache_dir = os.getenv("SENTENCE_TRANSFORMERS_HOME")
local_files_only = not update_embedding_model
snapshot_kwargs = {
"cache_dir": cache_dir,
"local_files_only": local_files_only,
}
log.debug(f"embedding_model: {embedding_model}")
log.debug(f"snapshot_kwargs: {snapshot_kwargs}")
# Inspiration from upstream sentence_transformers
if (
os.path.exists(embedding_model)
or ("\\" in embedding_model or embedding_model.count("/") > 1)
and local_files_only
):
# If fully qualified path exists, return input, else set repo_id
return embedding_model
elif "/" not in embedding_model:
# Set valid repo_id for model short-name
embedding_model = "sentence-transformers" + "/" + embedding_model
snapshot_kwargs["repo_id"] = embedding_model
# Attempt to query the huggingface_hub library to determine the local path and/or to update
try:
embedding_model_repo_path = snapshot_download(**snapshot_kwargs)
log.debug(f"embedding_model_repo_path: {embedding_model_repo_path}")
return embedding_model_repo_path
except Exception as e:
log.exception(f"Cannot determine embedding model snapshot path: {e}")
return embedding_model
......@@ -86,6 +86,7 @@ class SignupForm(BaseModel):
name: str
email: str
password: str
profile_image_url: Optional[str] = "/user.png"
class AuthsTable:
......@@ -94,7 +95,12 @@ class AuthsTable:
self.db.create_tables([Auth])
def insert_new_auth(
self, email: str, password: str, name: str, role: str = "pending"
self,
email: str,
password: str,
name: str,
profile_image_url: str = "/user.png",
role: str = "pending",
) -> Optional[UserModel]:
log.info("insert_new_auth")
......@@ -105,7 +111,7 @@ class AuthsTable:
)
result = Auth.create(**auth.model_dump())
user = Users.insert_new_user(id, name, email, role)
user = Users.insert_new_user(id, name, email, profile_image_url, role)
if result and user:
return user
......
......@@ -206,6 +206,18 @@ class ChatTable:
except:
return None
def get_chat_by_share_id(self, id: str) -> Optional[ChatModel]:
try:
chat = Chat.get(Chat.share_id == id)
if chat:
chat = Chat.get(Chat.id == id)
return ChatModel(**model_to_dict(chat))
else:
return None
except:
return None
def get_chat_by_id_and_user_id(self, id: str, user_id: str) -> Optional[ChatModel]:
try:
chat = Chat.get(Chat.id == id, Chat.user_id == user_id)
......
......@@ -31,7 +31,7 @@ class UserModel(BaseModel):
name: str
email: str
role: str = "pending"
profile_image_url: str = "/user.png"
profile_image_url: str
timestamp: int # timestamp in epoch
api_key: Optional[str] = None
......@@ -59,7 +59,12 @@ class UsersTable:
self.db.create_tables([User])
def insert_new_user(
self, id: str, name: str, email: str, role: str = "pending"
self,
id: str,
name: str,
email: str,
profile_image_url: str = "/user.png",
role: str = "pending",
) -> Optional[UserModel]:
user = UserModel(
**{
......@@ -67,7 +72,7 @@ class UsersTable:
"name": name,
"email": email,
"role": role,
"profile_image_url": "/user.png",
"profile_image_url": profile_image_url,
"timestamp": int(time.time()),
}
)
......
......@@ -163,7 +163,11 @@ async def signup(request: Request, form_data: SignupForm):
)
hashed = get_password_hash(form_data.password)
user = Auths.insert_new_auth(
form_data.email.lower(), hashed, form_data.name, role
form_data.email.lower(),
hashed,
form_data.name,
form_data.profile_image_url,
role,
)
if user:
......
......@@ -251,7 +251,15 @@ async def delete_shared_chat_by_id(id: str, user=Depends(get_current_user)):
@router.get("/share/{share_id}", response_model=Optional[ChatResponse])
async def get_shared_chat_by_id(share_id: str, user=Depends(get_current_user)):
chat = Chats.get_chat_by_id(share_id)
if user.role == "pending":
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED, detail=ERROR_MESSAGES.NOT_FOUND
)
if user.role == "user":
chat = Chats.get_chat_by_share_id(share_id)
elif user.role == "admin":
chat = Chats.get_chat_by_id(share_id)
if chat:
return ChatResponse(**{**chat.model_dump(), "chat": json.loads(chat.chat)})
......
......@@ -28,8 +28,6 @@ except ImportError:
WEBUI_NAME = os.environ.get("WEBUI_NAME", "Open WebUI")
WEBUI_FAVICON_URL = "https://openwebui.com/favicon.png"
shutil.copyfile("../build/favicon.png", "./static/favicon.png")
####################################
# ENV (dev,test,prod)
####################################
......@@ -103,6 +101,26 @@ for version in soup.find_all("h2"):
CHANGELOG = changelog_json
####################################
# DATA/FRONTEND BUILD DIR
####################################
DATA_DIR = str(Path(os.getenv("DATA_DIR", "./data")).resolve())
FRONTEND_BUILD_DIR = str(Path(os.getenv("FRONTEND_BUILD_DIR", "../build")))
try:
with open(f"{DATA_DIR}/config.json", "r") as f:
CONFIG_DATA = json.load(f)
except:
CONFIG_DATA = {}
####################################
# Static DIR
####################################
STATIC_DIR = str(Path(os.getenv("STATIC_DIR", "./static")).resolve())
shutil.copyfile(f"{FRONTEND_BUILD_DIR}/favicon.png", f"{STATIC_DIR}/favicon.png")
####################################
# LOGGING
......@@ -165,7 +183,7 @@ if CUSTOM_NAME:
r = requests.get(url, stream=True)
if r.status_code == 200:
with open("./static/favicon.png", "wb") as f:
with open(f"{STATIC_DIR}/favicon.png", "wb") as f:
r.raw.decode_content = True
shutil.copyfileobj(r.raw, f)
......@@ -177,18 +195,6 @@ else:
if WEBUI_NAME != "Open WebUI":
WEBUI_NAME += " (Open WebUI)"
####################################
# DATA/FRONTEND BUILD DIR
####################################
DATA_DIR = str(Path(os.getenv("DATA_DIR", "./data")).resolve())
FRONTEND_BUILD_DIR = str(Path(os.getenv("FRONTEND_BUILD_DIR", "../build")))
try:
with open(f"{DATA_DIR}/config.json", "r") as f:
CONFIG_DATA = json.load(f)
except:
CONFIG_DATA = {}
####################################
# File Upload DIR
......@@ -257,6 +263,7 @@ OLLAMA_API_BASE_URL = os.environ.get(
OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "")
K8S_FLAG = os.environ.get("K8S_FLAG", "")
USE_OLLAMA_DOCKER = os.environ.get("USE_OLLAMA_DOCKER", "false")
if OLLAMA_BASE_URL == "" and OLLAMA_API_BASE_URL != "":
OLLAMA_BASE_URL = (
......@@ -266,9 +273,13 @@ if OLLAMA_BASE_URL == "" and OLLAMA_API_BASE_URL != "":
)
if ENV == "prod":
if OLLAMA_BASE_URL == "/ollama":
OLLAMA_BASE_URL = "http://host.docker.internal:11434"
if OLLAMA_BASE_URL == "/ollama" and not K8S_FLAG:
if USE_OLLAMA_DOCKER.lower() == "true":
# if you use all-in-one docker container (Open WebUI + Ollama)
# with the docker build arg USE_OLLAMA=true (--build-arg="USE_OLLAMA=true") this only works with http://localhost:11434
OLLAMA_BASE_URL = "http://localhost:11434"
else:
OLLAMA_BASE_URL = "http://host.docker.internal:11434"
elif K8S_FLAG:
OLLAMA_BASE_URL = "http://ollama-service.open-webui.svc.cluster.local:11434"
......@@ -391,10 +402,22 @@ if WEBUI_AUTH and WEBUI_SECRET_KEY == "":
CHROMA_DATA_PATH = f"{DATA_DIR}/vector_db"
# this uses the model defined in the Dockerfile ENV variable. If you dont use docker or docker based deployments such as k8s, the default embedding model will be used (all-MiniLM-L6-v2)
RAG_EMBEDDING_MODEL = os.environ.get("RAG_EMBEDDING_MODEL", "all-MiniLM-L6-v2")
# device type ebbeding models - "cpu" (default), "cuda" (nvidia gpu required) or "mps" (apple silicon) - choosing this right can lead to better performance
RAG_EMBEDDING_MODEL_DEVICE_TYPE = os.environ.get(
"RAG_EMBEDDING_MODEL_DEVICE_TYPE", "cpu"
log.info(f"Embedding model set: {RAG_EMBEDDING_MODEL}"),
RAG_EMBEDDING_MODEL_AUTO_UPDATE = (
os.environ.get("RAG_EMBEDDING_MODEL_AUTO_UPDATE", "").lower() == "true"
)
# device type ebbeding models - "cpu" (default), "cuda" (nvidia gpu required) or "mps" (apple silicon) - choosing this right can lead to better performance
USE_CUDA = os.environ.get("USE_CUDA_DOCKER", "false")
if USE_CUDA.lower() == "true":
DEVICE_TYPE = "cuda"
else:
DEVICE_TYPE = "cpu"
CHROMA_CLIENT = chromadb.PersistentClient(
path=CHROMA_DATA_PATH,
settings=Settings(allow_reset=True, anonymized_telemetry=False),
......
......@@ -5,6 +5,7 @@ import time
import os
import sys
import logging
import aiohttp
import requests
from fastapi import FastAPI, Request, Depends, status
......@@ -18,6 +19,7 @@ from starlette.middleware.base import BaseHTTPMiddleware
from apps.ollama.main import app as ollama_app
from apps.openai.main import app as openai_app
from apps.litellm.main import app as litellm_app, startup as litellm_app_startup
from apps.audio.main import app as audio_app
from apps.images.main import app as images_app
......@@ -38,6 +40,8 @@ from config import (
VERSION,
CHANGELOG,
FRONTEND_BUILD_DIR,
CACHE_DIR,
STATIC_DIR,
MODEL_FILTER_ENABLED,
MODEL_FILTER_LIST,
GLOBAL_LOG_LEVEL,
......@@ -269,14 +273,16 @@ async def get_app_changelog():
@app.get("/api/version/updates")
async def get_app_latest_release_version():
try:
response = requests.get(
f"https://api.github.com/repos/open-webui/open-webui/releases/latest"
)
response.raise_for_status()
latest_version = response.json()["tag_name"]
return {"current": VERSION, "latest": latest_version[1:]}
except Exception as e:
async with aiohttp.ClientSession() as session:
async with session.get(
"https://api.github.com/repos/open-webui/open-webui/releases/latest"
) as response:
response.raise_for_status()
data = await response.json()
latest_version = data["tag_name"]
return {"current": VERSION, "latest": latest_version[1:]}
except aiohttp.ClientError as e:
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail=ERROR_MESSAGES.RATE_LIMIT_EXCEEDED,
......@@ -297,9 +303,8 @@ async def get_manifest_json():
}
app.mount("/static", StaticFiles(directory="static"), name="static")
app.mount("/cache", StaticFiles(directory="data/cache"), name="cache")
app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
app.mount("/cache", StaticFiles(directory=CACHE_DIR), name="cache")
app.mount(
"/",
......
......@@ -7,16 +7,26 @@ KEY_FILE=.webui_secret_key
PORT="${PORT:-8080}"
if test "$WEBUI_SECRET_KEY $WEBUI_JWT_SECRET_KEY" = " "; then
echo No WEBUI_SECRET_KEY provided
echo "No WEBUI_SECRET_KEY provided"
if ! [ -e "$KEY_FILE" ]; then
echo Generating WEBUI_SECRET_KEY
echo "Generating WEBUI_SECRET_KEY"
# Generate a random value to use as a WEBUI_SECRET_KEY in case the user didn't provide one.
echo $(head -c 12 /dev/random | base64) > $KEY_FILE
echo $(head -c 12 /dev/random | base64) > "$KEY_FILE"
fi
echo Loading WEBUI_SECRET_KEY from $KEY_FILE
WEBUI_SECRET_KEY=`cat $KEY_FILE`
echo "Loading WEBUI_SECRET_KEY from $KEY_FILE"
WEBUI_SECRET_KEY=$(cat "$KEY_FILE")
fi
WEBUI_SECRET_KEY="$WEBUI_SECRET_KEY" exec uvicorn main:app --host 0.0.0.0 --port "$PORT" --forwarded-allow-ips '*'
\ No newline at end of file
if [ "$USE_OLLAMA_DOCKER" = "true" ]; then
echo "USE_OLLAMA is set to true, starting ollama serve."
ollama serve &
fi
if [ "$USE_CUDA_DOCKER" = "true" ]; then
echo "CUDA is enabled, appending LD_LIBRARY_PATH to include torch/cudnn & cublas libraries."
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/python3.11/site-packages/torch/lib:/usr/local/lib/python3.11/site-packages/nvidia/cudnn/lib"
fi
WEBUI_SECRET_KEY="$WEBUI_SECRET_KEY" exec uvicorn main:app --host 0.0.0.0 --port "$PORT" --forwarded-allow-ips '*'
services:
ollama:
devices:
- /dev/kfd:/dev/kfd
- /dev/dri:/dev/dri
image: ollama/ollama:${OLLAMA_DOCKER_TAG-rocm}
environment:
- 'HSA_OVERRIDE_GFX_VERSION=${HSA_OVERRIDE_GFX_VERSION-11.0.0}'
\ No newline at end of file
......@@ -8,7 +8,7 @@ services:
pull_policy: always
tty: true
restart: unless-stopped
image: ollama/ollama:latest
image: ollama/ollama:${OLLAMA_DOCKER_TAG-latest}
open-webui:
build:
......@@ -16,7 +16,7 @@ services:
args:
OLLAMA_BASE_URL: '/ollama'
dockerfile: Dockerfile
image: ghcr.io/open-webui/open-webui:main
image: ghcr.io/open-webui/open-webui:${WEBUI_DOCKER_TAG-main}
container_name: open-webui
volumes:
- open-webui:/app/backend/data
......
......@@ -7,8 +7,12 @@ ollama
{{- end -}}
{{- define "ollama.url" -}}
{{- if .Values.ollama.externalHost }}
{{- printf .Values.ollama.externalHost }}
{{- else }}
{{- printf "http://%s.%s.svc.cluster.local:%d/" (include "ollama.name" .) (.Release.Namespace) (.Values.ollama.service.port | int) }}
{{- end }}
{{- end }}
{{- define "chart.name" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment