Merge pull request #1488 from open-webui/dev

0.1.118

Merge pull request #1488 from open-webui/dev
0.1.118
78284e49 · Timothy Jaeryang Baek · GitHub · 331fe04d · 64a6db4b · 78284e49
Unverified Commit 78284e49 authored Apr 10, 2024 by Timothy Jaeryang Baek Committed by GitHub Apr 10, 2024
20 changed files
--- a/.github/workflows/build-release.yml
+++ b/.github/workflows/build-release.yml
@@ -57,3 +57,14 @@ jobs:
        path: .
      env:
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+    - name: Trigger Docker build workflow
+      uses: actions/github-script@v7
+      with:
+        script: |
+          github.rest.actions.createWorkflowDispatch({
+            owner: context.repo.owner,
+            repo: context.repo.repo,
+            workflow_id: 'docker-build.yaml',
+            ref: 'v${{ steps.get_version.outputs.version }}',
+          })
--- a/.github/workflows/docker-build.yaml
+++ b/.github/workflows/docker-build.yaml
-#
-name: Create and publish a Docker image
+name: Create and publish Docker images with specific build args

-# Configures this workflow to run every time a change is pushed to the branch called `release`.
 on:
+  workflow_dispatch:
  push:
    branches:
      - main
@@ -10,30 +9,39 @@ on:
    tags:
      - v*

-# Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds.
 env:
  REGISTRY: ghcr.io
  IMAGE_NAME: ${{ github.repository }}
+  FULL_IMAGE_NAME: ghcr.io/${{ github.repository }}

-# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu.
 jobs:
-  build-and-push-image:
+  build-main-image:
    runs-on: ubuntu-latest
-    # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job.
    permissions:
      contents: read
      packages: write
-      #
+    strategy:
+      fail-fast: false
+      matrix:
+        platform:
+          - linux/amd64
+          - linux/arm64
+
    steps:
+      - name: Prepare
+        run: |
+          platform=${{ matrix.platform }}
+          echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
+
      - name: Checkout repository
        uses: actions/checkout@v4
-      # Required for multi architecture build
+
      - name: Set up QEMU
        uses: docker/setup-qemu-action@v3
-      # Required for multi architecture build
+
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
-      # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here.
+
      - name: Log in to the Container registry
        uses: docker/login-action@v3
        with:
@@ -41,12 +49,11 @@ jobs:
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}

-      - name: Extract metadata for Docker images
+      - name: Extract metadata for Docker images (default latest tag)
        id: meta
        uses: docker/metadata-action@v5
        with:
-          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
-          # This configuration dynamically generates tags based on the branch, tag, commit, and custom suffix for lite version.
+          images: ${{ env.FULL_IMAGE_NAME }}
          tags: |
            type=ref,event=branch
            type=ref,event=tag
@@ -56,11 +63,322 @@ jobs:
          flavor: |
            latest=${{ github.ref == 'refs/heads/main' }}

-      - name: Build and push Docker image
+      - name: Build Docker image (latest)
        uses: docker/build-push-action@v5
+        id: build
        with:
          context: .
          push: true
-          platforms: linux/amd64,linux/arm64
-          tags: ${{ steps.meta.outputs.tags }}
+          platforms: ${{ matrix.platform }}
          labels: ${{ steps.meta.outputs.labels }}
+          outputs: type=image,name=${{ env.FULL_IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+
+      - name: Export digest
+        run: |
+          mkdir -p /tmp/digests
+          digest="${{ steps.build.outputs.digest }}"
+          touch "/tmp/digests/${digest#sha256:}"
+
+      - name: Upload digest
+        uses: actions/upload-artifact@v4
+        with:
+          name: digests-main-${{ env.PLATFORM_PAIR }}
+          path: /tmp/digests/*
+          if-no-files-found: error
+          retention-days: 1
+
+  build-cuda-image:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+    strategy:
+      fail-fast: false
+      matrix:
+        platform:
+          - linux/amd64
+          - linux/arm64
+
+    steps:
+      - name: Prepare
+        run: |
+          platform=${{ matrix.platform }}
+          echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
+
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to the Container registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract metadata for Docker images (default latest tag)
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.FULL_IMAGE_NAME }}
+          tags: |
+            type=ref,event=branch
+            type=ref,event=tag
+            type=sha,prefix=git-
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+            type=raw,enable=${{ github.ref == 'refs/heads/main' }},prefix=,suffix=,value=cuda
+          flavor: |
+            latest=${{ github.ref == 'refs/heads/main' }}
+            suffix=-cuda,onlatest=true
+
+      - name: Build Docker image (cuda)
+        uses: docker/build-push-action@v5
+        id: build
+        with:
+          context: .
+          push: true
+          platforms: ${{ matrix.platform }}
+          labels: ${{ steps.meta.outputs.labels }}
+          outputs: type=image,name=${{ env.FULL_IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+          build-args: USE_CUDA=true
+
+      - name: Export digest
+        run: |
+          mkdir -p /tmp/digests
+          digest="${{ steps.build.outputs.digest }}"
+          touch "/tmp/digests/${digest#sha256:}"
+
+      - name: Upload digest
+        uses: actions/upload-artifact@v4
+        with:
+          name: digests-cuda-${{ env.PLATFORM_PAIR }}
+          path: /tmp/digests/*
+          if-no-files-found: error
+          retention-days: 1
+
+  build-ollama-image:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+    strategy:
+      fail-fast: false
+      matrix:
+        platform:
+          - linux/amd64
+          - linux/arm64
+
+    steps:
+      - name: Prepare
+        run: |
+          platform=${{ matrix.platform }}
+          echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
+
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to the Container registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract metadata for Docker images (ollama tag)
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.FULL_IMAGE_NAME }}
+          tags: |
+            type=ref,event=branch
+            type=ref,event=tag
+            type=sha,prefix=git-
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+            type=raw,enable=${{ github.ref == 'refs/heads/main' }},prefix=,suffix=,value=ollama
+          flavor: |
+            latest=${{ github.ref == 'refs/heads/main' }}
+            suffix=-ollama,onlatest=true
+
+      - name: Build Docker image (ollama)
+        uses: docker/build-push-action@v5
+        id: build
+        with:
+          context: .
+          push: true
+          platforms: ${{ matrix.platform }}
+          labels: ${{ steps.meta.outputs.labels }}
+          outputs: type=image,name=${{ env.FULL_IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+          build-args: USE_OLLAMA=true
+
+      - name: Export digest
+        run: |
+          mkdir -p /tmp/digests
+          digest="${{ steps.build.outputs.digest }}"
+          touch "/tmp/digests/${digest#sha256:}"
+
+      - name: Upload digest
+        uses: actions/upload-artifact@v4
+        with:
+          name: digests-ollama-${{ env.PLATFORM_PAIR }}
+          path: /tmp/digests/*
+          if-no-files-found: error
+          retention-days: 1
+
+  merge-main-images:
+    runs-on: ubuntu-latest
+    needs: [ build-main-image ]
+    steps:
+      - name: Download digests
+        uses: actions/download-artifact@v4
+        with:
+          pattern: digests-main-*
+          path: /tmp/digests
+          merge-multiple: true
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to the Container registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract metadata for Docker images (default latest tag)
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.FULL_IMAGE_NAME }}
+          tags: |
+            type=ref,event=branch
+            type=ref,event=tag
+            type=sha,prefix=git-
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+          flavor: |
+            latest=${{ github.ref == 'refs/heads/main' }}
+
+      - name: Create manifest list and push
+        working-directory: /tmp/digests
+        run: |
+          docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
+            $(printf '${{ env.FULL_IMAGE_NAME }}@sha256:%s ' *)
+
+      - name: Inspect image
+        run: |
+          docker buildx imagetools inspect ${{ env.FULL_IMAGE_NAME }}:${{ steps.meta.outputs.version }}
+
+
+  merge-cuda-images:
+    runs-on: ubuntu-latest
+    needs: [ build-cuda-image ]
+    steps:
+      - name: Download digests
+        uses: actions/download-artifact@v4
+        with:
+          pattern: digests-cuda-*
+          path: /tmp/digests
+          merge-multiple: true
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to the Container registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract metadata for Docker images (default latest tag)
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.FULL_IMAGE_NAME }}
+          tags: |
+            type=ref,event=branch
+            type=ref,event=tag
+            type=sha,prefix=git-
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+            type=raw,enable=${{ github.ref == 'refs/heads/main' }},prefix=,suffix=,value=cuda
+          flavor: |
+            latest=${{ github.ref == 'refs/heads/main' }}
+            suffix=-cuda,onlatest=true
+
+      - name: Create manifest list and push
+        working-directory: /tmp/digests
+        run: |
+          docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
+            $(printf '${{ env.FULL_IMAGE_NAME }}@sha256:%s ' *)
+
+      - name: Inspect image
+        run: |
+          docker buildx imagetools inspect ${{ env.FULL_IMAGE_NAME }}:${{ steps.meta.outputs.version }}
+
+  merge-ollama-images:
+    runs-on: ubuntu-latest
+    needs: [ build-ollama-image ]
+    steps:
+      - name: Download digests
+        uses: actions/download-artifact@v4
+        with:
+          pattern: digests-ollama-*
+          path: /tmp/digests
+          merge-multiple: true
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to the Container registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract metadata for Docker images (default ollama tag)
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.FULL_IMAGE_NAME }}
+          tags: |
+            type=ref,event=branch
+            type=ref,event=tag
+            type=sha,prefix=git-
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+            type=raw,enable=${{ github.ref == 'refs/heads/main' }},prefix=,suffix=,value=ollama
+          flavor: |
+            latest=${{ github.ref == 'refs/heads/main' }}
+            suffix=-ollama,onlatest=true
+
+      - name: Create manifest list and push
+        working-directory: /tmp/digests
+        run: |
+          docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
+            $(printf '${{ env.FULL_IMAGE_NAME }}@sha256:%s ' *)
+
+      - name: Inspect image
+        run: |
+          docker buildx imagetools inspect ${{ env.FULL_IMAGE_NAME }}:${{ steps.meta.outputs.version }}
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,26 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

+## [0.1.118] - 2024-04-10
+
+### Added
+
+- **🦙 Ollama and CUDA Images**: Added support for `:ollama` and `:cuda` tagged images.
+- **👍 Enhanced Response Rating**: Now you can annotate your ratings for better feedback.
+- **👤 User Initials Profile Photo**: User initials are now the default profile photo.
+- **🔍 Update RAG Embedding Model**: Customize RAG embedding model directly in document settings.
+- **🌍 Additional Language Support**: Added Turkish language support.
+
+### Fixed
+
+- **🔒 Share Chat Permission**: Resolved issue with chat sharing permissions.
+- **🛠 Modal Close**: Modals can now be closed using the Esc key.
+
+### Changed
+
+- **🎨 Admin Panel Styling**: Refreshed styling for the admin panel.
+- **🐳 Docker Image Build**: Updated docker image build process for improved efficiency.
+
 ## [0.1.117] - 2024-04-03

 ### Added

--- a/Dockerfile
+++ b/Dockerfile
 # syntax=docker/dockerfile:1
+# Initialize device type args
+# use build args in the docker build commmand with --build-arg="BUILDARG=true"
+ARG USE_CUDA=false
+ARG USE_OLLAMA=false
+# Tested with cu117 for CUDA 11 and cu121 for CUDA 12 (default)
+ARG USE_CUDA_VER=cu121
+# any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers
+# Leaderboard: https://huggingface.co/spaces/mteb/leaderboard 
+# for better performance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB)
+# IMPORTANT: If you change the default model (all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them.
+ARG USE_EMBEDDING_MODEL=all-MiniLM-L6-v2

-FROM node:alpine as build
+######## WebUI frontend ########
+FROM --platform=$BUILDPLATFORM node:21-alpine3.19 as build

 WORKDIR /app

-# wget embedding model weight from alpine (does not exist from slim-buster)
-RUN wget "https://chroma-onnx-models.s3.amazonaws.com/all-MiniLM-L6-v2/onnx.tar.gz" -O - | \
-    tar -xzf - -C /app
-
 COPY package.json package-lock.json ./
 RUN npm ci

 COPY . .
 RUN npm run build

-
+######## WebUI backend ########
 FROM python:3.11-slim-bookworm as base

-ENV ENV=prod
-ENV PORT ""
-
-ENV OLLAMA_BASE_URL "/ollama"
-
-ENV OPENAI_API_BASE_URL ""
-ENV OPENAI_API_KEY ""
-
-ENV WEBUI_SECRET_KEY ""
-ENV WEBUI_AUTH_TRUSTED_EMAIL_HEADER ""
-
-ENV SCARF_NO_ANALYTICS true
-ENV DO_NOT_TRACK true
+# Use args
+ARG USE_CUDA
+ARG USE_OLLAMA
+ARG USE_CUDA_VER
+ARG USE_EMBEDDING_MODEL
+
+## Basis ##
+ENV ENV=prod \
+    PORT=8080 \
+    # pass build args to the build
+    USE_OLLAMA_DOCKER=${USE_OLLAMA} \
+    USE_CUDA_DOCKER=${USE_CUDA} \
+    USE_CUDA_DOCKER_VER=${USE_CUDA_VER} \
+    USE_EMBEDDING_MODEL_DOCKER=${USE_EMBEDDING_MODEL}
+
+## Basis URL Config ##
+ENV OLLAMA_BASE_URL="/ollama" \
+    OPENAI_API_BASE_URL=""
+
+## API Key and Security Config ##
+ENV OPENAI_API_KEY="" \
+    WEBUI_SECRET_KEY="" \
+    SCARF_NO_ANALYTICS=true \
+    DO_NOT_TRACK=true

 # Use locally bundled version of the LiteLLM cost map json
 # to avoid repetitive startup connections
 ENV LITELLM_LOCAL_MODEL_COST_MAP="True"

-######## Preloaded models ########
-# whisper TTS Settings
-ENV WHISPER_MODEL="base"
-ENV WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models"

-# RAG Embedding Model Settings
-# any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers
-# Leaderboard: https://huggingface.co/spaces/mteb/leaderboard 
-# for better persormance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB)
-# IMPORTANT: If you change the default model (all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them.
-ENV RAG_EMBEDDING_MODEL="all-MiniLM-L6-v2"
-# device type for whisper tts and embbeding models - "cpu" (default), "cuda" (nvidia gpu and CUDA required) or "mps" (apple silicon) - choosing this right can lead to better performance
-ENV RAG_EMBEDDING_MODEL_DEVICE_TYPE="cpu"
-ENV RAG_EMBEDDING_MODEL_DIR="/app/backend/data/cache/embedding/models"
-ENV SENTENCE_TRANSFORMERS_HOME $RAG_EMBEDDING_MODEL_DIR
+#### Other models #########################################################
+## whisper TTS model settings ##
+ENV WHISPER_MODEL="base" \
+    WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models"

-######## Preloaded models ########
+## RAG Embedding model settings ##
+ENV RAG_EMBEDDING_MODEL="$USE_EMBEDDING_MODEL_DOCKER" \
+    RAG_EMBEDDING_MODEL_DIR="/app/backend/data/cache/embedding/models" \
+    SENTENCE_TRANSFORMERS_HOME="/app/backend/data/cache/embedding/models"
+#### Other models ##########################################################

 WORKDIR /app/backend

+RUN if [ "$USE_OLLAMA" = "true" ]; then \
+        apt-get update && \
+        # Install pandoc and netcat
+        apt-get install -y --no-install-recommends pandoc netcat-openbsd && \
+        # for RAG OCR
+        apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \
+        # install helper tools
+        apt-get install -y --no-install-recommends curl && \
+        # install ollama
+        curl -fsSL https://ollama.com/install.sh | sh && \
+        # cleanup
+        rm -rf /var/lib/apt/lists/*; \
+    else \
+        apt-get update && \
+        # Install pandoc and netcat
+        apt-get install -y --no-install-recommends pandoc netcat-openbsd && \
+        # for RAG OCR
+        apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \
+        # cleanup
+        rm -rf /var/lib/apt/lists/*; \
+    fi
+
 # install python dependencies
 COPY ./backend/requirements.txt ./requirements.txt

-RUN apt-get update && apt-get install ffmpeg libsm6 libxext6  -y
-
-RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir
-RUN pip3 install -r requirements.txt --no-cache-dir
+RUN if [ "$USE_CUDA" = "true" ]; then \
+        # If you use CUDA the whisper and embedding model will be downloaded on first use
+        pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir && \
+        pip3 install -r requirements.txt --no-cache-dir && \
+        python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \
+        python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device='cpu')"; \
+    else \
+        pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \
+        pip3 install -r requirements.txt --no-cache-dir && \
+        python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \
+        python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device='cpu')"; \
+    fi

-# Install pandoc and netcat
-# RUN python -c "import pypandoc; pypandoc.download_pandoc()"
-RUN apt-get update \
-    && apt-get install -y pandoc netcat-openbsd \
-    && rm -rf /var/lib/apt/lists/*

-# preload embedding model
-RUN python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device=os.environ['RAG_EMBEDDING_MODEL_DEVICE_TYPE'])"
-# preload tts model
-RUN python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='auto', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"

 # copy embedding weight from build
-RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2
-COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx
+# RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2
+# COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx

 # copy built frontend files
 COPY --from=build /app/build /app/build
@@ -86,4 +120,6 @@ COPY --from=build /app/package.json /app/package.json
 # copy backend files
 COPY ./backend .

+EXPOSE 8080
+
 CMD [ "bash", "start.sh"]
\ No newline at end of file
--- a/README.md
+++ b/README.md
@@ -94,24 +94,27 @@ Don't forget to explore our sibling project, [Open WebUI Community](https://open

 ### Quick Start with Docker 🐳

-> [!IMPORTANT]
+> [!WARNING]
 > When using Docker to install Open WebUI, make sure to include the `-v open-webui:/app/backend/data` in your Docker command. This step is crucial as it ensures your database is properly mounted and prevents any loss of data.

- **If Ollama is on your computer**, use this command:
+> [!TIP]  
+> If you wish to utilize Open WebUI with Ollama included or CUDA acceleration, we recommend utilizing our official images tagged with either `:cuda` or `:ollama`. To enable CUDA, you must install the [Nvidia CUDA container toolkit](https://docs.nvidia.com/dgx/nvidia-container-runtime-upgrade/) on your Linux/WSL system.

-  ```bash
-  docker run -d -p 3000:8080 --add-host=host.docker.internal:host-gateway -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main
-  ```
+**If Ollama is on your computer**, use this command:

- **If Ollama is on a Different Server**, use this command:
+```bash
+docker run -d -p 3000:8080 --add-host=host.docker.internal:host-gateway -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main
+```
+
+**If Ollama is on a Different Server**, use this command:

- To connect to Ollama on another server, change the `OLLAMA_BASE_URL` to the server's URL:
+To connect to Ollama on another server, change the `OLLAMA_BASE_URL` to the server's URL:

-  ```bash
-  docker run -d -p 3000:8080 -e OLLAMA_BASE_URL=https://example.com -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main
-  ```
+```bash
+docker run -d -p 3000:8080 -e OLLAMA_BASE_URL=https://example.com -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main
+```

- After installation, you can access Open WebUI at [http://localhost:3000](http://localhost:3000). Enjoy! 😄
+After installation, you can access Open WebUI at [http://localhost:3000](http://localhost:3000). Enjoy! 😄

 #### Open WebUI: Server Connection Error


--- a/backend/apps/audio/main.py
+++ b/backend/apps/audio/main.py
@@ -28,6 +28,7 @@ from config import (
    UPLOAD_DIR,
    WHISPER_MODEL,
    WHISPER_MODEL_DIR,
+    DEVICE_TYPE,
 )

 log = logging.getLogger(__name__)
@@ -42,6 +43,10 @@ app.add_middleware(
    allow_headers=["*"],
 )

+# setting device type for whisper model
+whisper_device_type = DEVICE_TYPE if DEVICE_TYPE and DEVICE_TYPE == "cuda" else "cpu"
+log.info(f"whisper_device_type: {whisper_device_type}")
+

 @app.post("/transcribe")
 def transcribe(
@@ -66,7 +71,7 @@ def transcribe(

        model = WhisperModel(
            WHISPER_MODEL,
-            device="auto",
+            device=whisper_device_type,
            compute_type="int8",
            download_root=WHISPER_MODEL_DIR,
        )

--- a/backend/apps/ollama/main.py
+++ b/backend/apps/ollama/main.py
@@ -215,7 +215,8 @@ async def get_ollama_versions(url_idx: Optional[int] = None):

        if len(responses) > 0:
            lowest_version = min(
-                responses, key=lambda x: tuple(map(int, x["version"].split(".")))
+                responses,
+                key=lambda x: tuple(map(int, x["version"].split("-")[0].split("."))),
            )

            return {"version": lowest_version["version"]}

--- a/backend/apps/rag/main.py
+++ b/backend/apps/rag/main.py
@@ -13,8 +13,8 @@ import os, shutil, logging, re
 from pathlib import Path
 from typing import List

-from sentence_transformers import SentenceTransformer
 from chromadb.utils import embedding_functions
+from chromadb.utils.batch_utils import create_batches

 from langchain_community.document_loaders import (
    WebBaseLoader,
@@ -45,7 +45,7 @@ from apps.web.models.documents import (
    DocumentResponse,
 )

-from apps.rag.utils import query_doc, query_collection
+from apps.rag.utils import query_doc, query_collection, get_embedding_model_path

 from utils.misc import (
    calculate_sha256,
@@ -59,7 +59,8 @@ from config import (
    UPLOAD_DIR,
    DOCS_DIR,
    RAG_EMBEDDING_MODEL,
-    RAG_EMBEDDING_MODEL_DEVICE_TYPE,
+    RAG_EMBEDDING_MODEL_AUTO_UPDATE,
+    DEVICE_TYPE,
    CHROMA_CLIENT,
    CHUNK_SIZE,
    CHUNK_OVERLAP,
@@ -71,28 +72,25 @@ from constants import ERROR_MESSAGES
 log = logging.getLogger(__name__)
 log.setLevel(SRC_LOG_LEVELS["RAG"])

-#
-# if RAG_EMBEDDING_MODEL:
-#    sentence_transformer_ef = SentenceTransformer(
-#        model_name_or_path=RAG_EMBEDDING_MODEL,
-#        cache_folder=RAG_EMBEDDING_MODEL_DIR,
-#        device=RAG_EMBEDDING_MODEL_DEVICE_TYPE,
-#    )
-
-
 app = FastAPI()

 app.state.PDF_EXTRACT_IMAGES = False
 app.state.CHUNK_SIZE = CHUNK_SIZE
 app.state.CHUNK_OVERLAP = CHUNK_OVERLAP
 app.state.RAG_TEMPLATE = RAG_TEMPLATE
+
+
 app.state.RAG_EMBEDDING_MODEL = RAG_EMBEDDING_MODEL
+
+
 app.state.TOP_K = 4

 app.state.sentence_transformer_ef = (
    embedding_functions.SentenceTransformerEmbeddingFunction(
-        model_name=app.state.RAG_EMBEDDING_MODEL,
-        device=RAG_EMBEDDING_MODEL_DEVICE_TYPE,
+        model_name=get_embedding_model_path(
+            app.state.RAG_EMBEDDING_MODEL, RAG_EMBEDDING_MODEL_AUTO_UPDATE
+        ),
+        device=DEVICE_TYPE,
    )
 )

@@ -143,19 +141,34 @@ class EmbeddingModelUpdateForm(BaseModel):
 async def update_embedding_model(
    form_data: EmbeddingModelUpdateForm, user=Depends(get_admin_user)
 ):
-    app.state.RAG_EMBEDDING_MODEL = form_data.embedding_model
-    app.state.sentence_transformer_ef = (
+
+    log.info(
+        f"Updating embedding model: {app.state.RAG_EMBEDDING_MODEL} to {form_data.embedding_model}"
+    )
+
+    try:
+        sentence_transformer_ef = (
            embedding_functions.SentenceTransformerEmbeddingFunction(
-            model_name=app.state.RAG_EMBEDDING_MODEL,
-            device=RAG_EMBEDDING_MODEL_DEVICE_TYPE,
+                model_name=get_embedding_model_path(form_data.embedding_model, True),
+                device=DEVICE_TYPE,
            )
        )

+        app.state.RAG_EMBEDDING_MODEL = form_data.embedding_model
+        app.state.sentence_transformer_ef = sentence_transformer_ef
+
        return {
            "status": True,
            "embedding_model": app.state.RAG_EMBEDDING_MODEL,
        }

+    except Exception as e:
+        log.exception(f"Problem updating embedding model: {e}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=ERROR_MESSAGES.DEFAULT(e),
+        )
+

 @app.get("/config")
 async def get_rag_config(user=Depends(get_admin_user)):
@@ -341,9 +354,14 @@ def store_docs_in_vector_db(docs, collection_name, overwrite: bool = False) -> b
            embedding_function=app.state.sentence_transformer_ef,
        )

-        collection.add(
-            documents=texts, metadatas=metadatas, ids=[str(uuid.uuid1()) for _ in texts]
-        )
+        for batch in create_batches(
+            api=CHROMA_CLIENT,
+            ids=[str(uuid.uuid1()) for _ in texts],
+            metadatas=metadatas,
+            documents=texts,
+        ):
+            collection.add(*batch)
+
        return True
    except Exception as e:
        log.exception(e)

--- a/backend/apps/rag/utils.py
+++ b/backend/apps/rag/utils.py
+import os
 import re
 import logging
 from typing import List
+from huggingface_hub import snapshot_download

 from config import SRC_LOG_LEVELS, CHROMA_CLIENT

@@ -188,3 +190,43 @@ def rag_messages(docs, messages, template, k, embedding_function):
    messages[last_user_message_idx] = new_user_message

    return messages
+
+
+def get_embedding_model_path(
+    embedding_model: str, update_embedding_model: bool = False
+):
+    # Construct huggingface_hub kwargs with local_files_only to return the snapshot path
+    cache_dir = os.getenv("SENTENCE_TRANSFORMERS_HOME")
+
+    local_files_only = not update_embedding_model
+
+    snapshot_kwargs = {
+        "cache_dir": cache_dir,
+        "local_files_only": local_files_only,
+    }
+
+    log.debug(f"embedding_model: {embedding_model}")
+    log.debug(f"snapshot_kwargs: {snapshot_kwargs}")
+
+    # Inspiration from upstream sentence_transformers
+    if (
+        os.path.exists(embedding_model)
+        or ("\\" in embedding_model or embedding_model.count("/") > 1)
+        and local_files_only
+    ):
+        # If fully qualified path exists, return input, else set repo_id
+        return embedding_model
+    elif "/" not in embedding_model:
+        # Set valid repo_id for model short-name
+        embedding_model = "sentence-transformers" + "/" + embedding_model
+
+    snapshot_kwargs["repo_id"] = embedding_model
+
+    # Attempt to query the huggingface_hub library to determine the local path and/or to update
+    try:
+        embedding_model_repo_path = snapshot_download(**snapshot_kwargs)
+        log.debug(f"embedding_model_repo_path: {embedding_model_repo_path}")
+        return embedding_model_repo_path
+    except Exception as e:
+        log.exception(f"Cannot determine embedding model snapshot path: {e}")
+        return embedding_model
--- a/backend/apps/web/models/auths.py
+++ b/backend/apps/web/models/auths.py
@@ -86,6 +86,7 @@ class SignupForm(BaseModel):
    name: str
    email: str
    password: str
+    profile_image_url: Optional[str] = "/user.png"


 class AuthsTable:
@@ -94,7 +95,12 @@ class AuthsTable:
        self.db.create_tables([Auth])

    def insert_new_auth(
-        self, email: str, password: str, name: str, role: str = "pending"
+        self,
+        email: str,
+        password: str,
+        name: str,
+        profile_image_url: str = "/user.png",
+        role: str = "pending",
    ) -> Optional[UserModel]:
        log.info("insert_new_auth")

@@ -105,7 +111,7 @@ class AuthsTable:
        )
        result = Auth.create(**auth.model_dump())

-        user = Users.insert_new_user(id, name, email, role)
+        user = Users.insert_new_user(id, name, email, profile_image_url, role)

        if result and user:
            return user

--- a/backend/apps/web/models/chats.py
+++ b/backend/apps/web/models/chats.py
@@ -206,6 +206,18 @@ class ChatTable:
        except:
            return None

+    def get_chat_by_share_id(self, id: str) -> Optional[ChatModel]:
+        try:
+            chat = Chat.get(Chat.share_id == id)
+
+            if chat:
+                chat = Chat.get(Chat.id == id)
+                return ChatModel(**model_to_dict(chat))
+            else:
+                return None
+        except:
+            return None
+
    def get_chat_by_id_and_user_id(self, id: str, user_id: str) -> Optional[ChatModel]:
        try:
            chat = Chat.get(Chat.id == id, Chat.user_id == user_id)

--- a/backend/apps/web/models/users.py
+++ b/backend/apps/web/models/users.py
@@ -31,7 +31,7 @@ class UserModel(BaseModel):
    name: str
    email: str
    role: str = "pending"
-    profile_image_url: str = "/user.png"
+    profile_image_url: str
    timestamp: int  # timestamp in epoch
    api_key: Optional[str] = None

@@ -59,7 +59,12 @@ class UsersTable:
        self.db.create_tables([User])

    def insert_new_user(
-        self, id: str, name: str, email: str, role: str = "pending"
+        self,
+        id: str,
+        name: str,
+        email: str,
+        profile_image_url: str = "/user.png",
+        role: str = "pending",
    ) -> Optional[UserModel]:
        user = UserModel(
            **{
@@ -67,7 +72,7 @@ class UsersTable:
                "name": name,
                "email": email,
                "role": role,
-                "profile_image_url": "/user.png",
+                "profile_image_url": profile_image_url,
                "timestamp": int(time.time()),
            }
        )

--- a/backend/apps/web/routers/auths.py
+++ b/backend/apps/web/routers/auths.py
@@ -163,7 +163,11 @@ async def signup(request: Request, form_data: SignupForm):
        )
        hashed = get_password_hash(form_data.password)
        user = Auths.insert_new_auth(
-            form_data.email.lower(), hashed, form_data.name, role
+            form_data.email.lower(),
+            hashed,
+            form_data.name,
+            form_data.profile_image_url,
+            role,
        )

        if user:

--- a/backend/apps/web/routers/chats.py
+++ b/backend/apps/web/routers/chats.py
@@ -251,6 +251,14 @@ async def delete_shared_chat_by_id(id: str, user=Depends(get_current_user)):

 @router.get("/share/{share_id}", response_model=Optional[ChatResponse])
 async def get_shared_chat_by_id(share_id: str, user=Depends(get_current_user)):
+    if user.role == "pending":
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED, detail=ERROR_MESSAGES.NOT_FOUND
+        )
+
+    if user.role == "user":
+        chat = Chats.get_chat_by_share_id(share_id)
+    elif user.role == "admin":
        chat = Chats.get_chat_by_id(share_id)

    if chat:

--- a/backend/config.py
+++ b/backend/config.py
@@ -28,8 +28,6 @@ except ImportError:
 WEBUI_NAME = os.environ.get("WEBUI_NAME", "Open WebUI")
 WEBUI_FAVICON_URL = "https://openwebui.com/favicon.png"

-shutil.copyfile("../build/favicon.png", "./static/favicon.png")
-
 ####################################
 # ENV (dev,test,prod)
 ####################################
@@ -103,6 +101,26 @@ for version in soup.find_all("h2"):

 CHANGELOG = changelog_json

+####################################
+# DATA/FRONTEND BUILD DIR
+####################################
+
+DATA_DIR = str(Path(os.getenv("DATA_DIR", "./data")).resolve())
+FRONTEND_BUILD_DIR = str(Path(os.getenv("FRONTEND_BUILD_DIR", "../build")))
+
+try:
+    with open(f"{DATA_DIR}/config.json", "r") as f:
+        CONFIG_DATA = json.load(f)
+except:
+    CONFIG_DATA = {}
+
+####################################
+# Static DIR
+####################################
+
+STATIC_DIR = str(Path(os.getenv("STATIC_DIR", "./static")).resolve())
+
+shutil.copyfile(f"{FRONTEND_BUILD_DIR}/favicon.png", f"{STATIC_DIR}/favicon.png")

 ####################################
 # LOGGING
@@ -165,7 +183,7 @@ if CUSTOM_NAME:

                r = requests.get(url, stream=True)
                if r.status_code == 200:
-                    with open("./static/favicon.png", "wb") as f:
+                    with open(f"{STATIC_DIR}/favicon.png", "wb") as f:
                        r.raw.decode_content = True
                        shutil.copyfileobj(r.raw, f)

@@ -177,18 +195,6 @@ else:
    if WEBUI_NAME != "Open WebUI":
        WEBUI_NAME += " (Open WebUI)"

-####################################
-# DATA/FRONTEND BUILD DIR
-####################################
-
-DATA_DIR = str(Path(os.getenv("DATA_DIR", "./data")).resolve())
-FRONTEND_BUILD_DIR = str(Path(os.getenv("FRONTEND_BUILD_DIR", "../build")))
-
-try:
-    with open(f"{DATA_DIR}/config.json", "r") as f:
-        CONFIG_DATA = json.load(f)
-except:
-    CONFIG_DATA = {}

 ####################################
 # File Upload DIR
@@ -257,6 +263,7 @@ OLLAMA_API_BASE_URL = os.environ.get(

 OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "")
 K8S_FLAG = os.environ.get("K8S_FLAG", "")
+USE_OLLAMA_DOCKER = os.environ.get("USE_OLLAMA_DOCKER", "false")

 if OLLAMA_BASE_URL == "" and OLLAMA_API_BASE_URL != "":
    OLLAMA_BASE_URL = (
@@ -266,9 +273,13 @@ if OLLAMA_BASE_URL == "" and OLLAMA_API_BASE_URL != "":
    )

 if ENV == "prod":
-    if OLLAMA_BASE_URL == "/ollama":
+    if OLLAMA_BASE_URL == "/ollama" and not K8S_FLAG:
+        if USE_OLLAMA_DOCKER.lower() == "true":
+            # if you use all-in-one docker container (Open WebUI + Ollama)
+            # with the docker build arg USE_OLLAMA=true (--build-arg="USE_OLLAMA=true") this only works with http://localhost:11434
+            OLLAMA_BASE_URL = "http://localhost:11434"
+        else:
            OLLAMA_BASE_URL = "http://host.docker.internal:11434"
-
    elif K8S_FLAG:
        OLLAMA_BASE_URL = "http://ollama-service.open-webui.svc.cluster.local:11434"

@@ -391,10 +402,22 @@ if WEBUI_AUTH and WEBUI_SECRET_KEY == "":
 CHROMA_DATA_PATH = f"{DATA_DIR}/vector_db"
 # this uses the model defined in the Dockerfile ENV variable. If you dont use docker or docker based deployments such as k8s, the default embedding model will be used (all-MiniLM-L6-v2)
 RAG_EMBEDDING_MODEL = os.environ.get("RAG_EMBEDDING_MODEL", "all-MiniLM-L6-v2")
-# device type ebbeding models - "cpu" (default), "cuda" (nvidia gpu required) or "mps" (apple silicon) - choosing this right can lead to better performance
-RAG_EMBEDDING_MODEL_DEVICE_TYPE = os.environ.get(
-    "RAG_EMBEDDING_MODEL_DEVICE_TYPE", "cpu"
+log.info(f"Embedding model set: {RAG_EMBEDDING_MODEL}"),
+
+RAG_EMBEDDING_MODEL_AUTO_UPDATE = (
+    os.environ.get("RAG_EMBEDDING_MODEL_AUTO_UPDATE", "").lower() == "true"
 )
+
+
+# device type ebbeding models - "cpu" (default), "cuda" (nvidia gpu required) or "mps" (apple silicon) - choosing this right can lead to better performance
+USE_CUDA = os.environ.get("USE_CUDA_DOCKER", "false")
+
+if USE_CUDA.lower() == "true":
+    DEVICE_TYPE = "cuda"
+else:
+    DEVICE_TYPE = "cpu"
+
+
 CHROMA_CLIENT = chromadb.PersistentClient(
    path=CHROMA_DATA_PATH,
    settings=Settings(allow_reset=True, anonymized_telemetry=False),

--- a/backend/main.py
+++ b/backend/main.py
@@ -5,6 +5,7 @@ import time
 import os
 import sys
 import logging
+import aiohttp
 import requests

 from fastapi import FastAPI, Request, Depends, status
@@ -18,6 +19,7 @@ from starlette.middleware.base import BaseHTTPMiddleware

 from apps.ollama.main import app as ollama_app
 from apps.openai.main import app as openai_app
+
 from apps.litellm.main import app as litellm_app, startup as litellm_app_startup
 from apps.audio.main import app as audio_app
 from apps.images.main import app as images_app
@@ -38,6 +40,8 @@ from config import (
    VERSION,
    CHANGELOG,
    FRONTEND_BUILD_DIR,
+    CACHE_DIR,
+    STATIC_DIR,
    MODEL_FILTER_ENABLED,
    MODEL_FILTER_LIST,
    GLOBAL_LOG_LEVEL,
@@ -269,14 +273,16 @@ async def get_app_changelog():
 @app.get("/api/version/updates")
 async def get_app_latest_release_version():
    try:
-        response = requests.get(
-            f"https://api.github.com/repos/open-webui/open-webui/releases/latest"
-        )
+        async with aiohttp.ClientSession() as session:
+            async with session.get(
+                "https://api.github.com/repos/open-webui/open-webui/releases/latest"
+            ) as response:
                response.raise_for_status()
-        latest_version = response.json()["tag_name"]
+                data = await response.json()
+                latest_version = data["tag_name"]

                return {"current": VERSION, "latest": latest_version[1:]}
-    except Exception as e:
+    except aiohttp.ClientError as e:
        raise HTTPException(
            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
            detail=ERROR_MESSAGES.RATE_LIMIT_EXCEEDED,
@@ -297,9 +303,8 @@ async def get_manifest_json():
    }


-app.mount("/static", StaticFiles(directory="static"), name="static")
-app.mount("/cache", StaticFiles(directory="data/cache"), name="cache")
-
+app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
+app.mount("/cache", StaticFiles(directory=CACHE_DIR), name="cache")

 app.mount(
    "/",

--- a/backend/start.sh
+++ b/backend/start.sh
@@ -7,16 +7,26 @@ KEY_FILE=.webui_secret_key

 PORT="${PORT:-8080}"
 if test "$WEBUI_SECRET_KEY $WEBUI_JWT_SECRET_KEY" = " "; then
-  echo No WEBUI_SECRET_KEY provided
+  echo "No WEBUI_SECRET_KEY provided"

  if ! [ -e "$KEY_FILE" ]; then
-    echo Generating WEBUI_SECRET_KEY
+    echo "Generating WEBUI_SECRET_KEY"
    # Generate a random value to use as a WEBUI_SECRET_KEY in case the user didn't provide one.
-    echo $(head -c 12 /dev/random | base64) > $KEY_FILE
+    echo $(head -c 12 /dev/random | base64) > "$KEY_FILE"
  fi

-  echo Loading WEBUI_SECRET_KEY from $KEY_FILE
-  WEBUI_SECRET_KEY=`cat $KEY_FILE`
+  echo "Loading WEBUI_SECRET_KEY from $KEY_FILE"
+  WEBUI_SECRET_KEY=$(cat "$KEY_FILE")
+fi
+
+if [ "$USE_OLLAMA_DOCKER" = "true" ]; then
+    echo "USE_OLLAMA is set to true, starting ollama serve."
+    ollama serve &
+fi
+
+if [ "$USE_CUDA_DOCKER" = "true" ]; then
+  echo "CUDA is enabled, appending LD_LIBRARY_PATH to include torch/cudnn & cublas libraries."
+  export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/python3.11/site-packages/torch/lib:/usr/local/lib/python3.11/site-packages/nvidia/cudnn/lib"
 fi

 WEBUI_SECRET_KEY="$WEBUI_SECRET_KEY" exec uvicorn main:app --host 0.0.0.0 --port "$PORT" --forwarded-allow-ips '*'
--- a/docker-compose.amdgpu.yaml
+++ b/docker-compose.amdgpu.yaml
+services:
+  ollama:
+    devices:
+      - /dev/kfd:/dev/kfd
+      - /dev/dri:/dev/dri
+    image: ollama/ollama:${OLLAMA_DOCKER_TAG-rocm}
+    environment:
+      - 'HSA_OVERRIDE_GFX_VERSION=${HSA_OVERRIDE_GFX_VERSION-11.0.0}'
\ No newline at end of file
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -8,7 +8,7 @@ services:
    pull_policy: always
    tty: true
    restart: unless-stopped
-    image: ollama/ollama:latest
+    image: ollama/ollama:${OLLAMA_DOCKER_TAG-latest}

  open-webui:
    build:
@@ -16,7 +16,7 @@ services:
      args:
        OLLAMA_BASE_URL: '/ollama'
      dockerfile: Dockerfile
-    image: ghcr.io/open-webui/open-webui:main
+    image: ghcr.io/open-webui/open-webui:${WEBUI_DOCKER_TAG-main}
    container_name: open-webui
    volumes:
      - open-webui:/app/backend/data

--- a/kubernetes/helm/templates/_helpers.tpl
+++ b/kubernetes/helm/templates/_helpers.tpl
@@ -7,8 +7,12 @@ ollama
 {{- end -}}

 {{- define "ollama.url" -}}
+{{- if .Values.ollama.externalHost }}
+{{- printf .Values.ollama.externalHost }}
+{{- else }}
 {{- printf "http://%s.%s.svc.cluster.local:%d/" (include "ollama.name" .) (.Release.Namespace) (.Values.ollama.service.port | int) }}
 {{- end }}
+{{- end }}

 {{- define "chart.name" -}}
 {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}