Merge remote-tracking branch 'upstream/dev' into feat/backend-web-search

60433856 · Jun Siang Cheah · 224a578e · 98194d97 · 60433856 · 60433856
Commit 60433856 authored May 22, 2024 by Jun Siang Cheah
20 changed files
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
--- a/.github/workflows/build-release.yml
+++ b/.github/workflows/build-release.yml
@@ -11,7 +11,7 @@ jobs:
    steps:
    - name: Checkout repository
-      uses: actions/checkout@v2
+      uses: actions/checkout@v4
    - name: Check for changes in package.json
      run: |
@@ -36,7 +36,7 @@ jobs:
        echo "::set-output name=content::$CHANGELOG_ESCAPED"
    - name: Create GitHub release
-      uses: actions/github-script@v5
+      uses: actions/github-script@v7
      with:
        github-token: ${{ secrets.GITHUB_TOKEN }}
        script: |
@@ -51,7 +51,7 @@ jobs:
          console.log(`Created release ${release.data.html_url}`)
    - name: Upload package to GitHub release
-      uses: actions/upload-artifact@v3
+      uses: actions/upload-artifact@v4
      with:
        name: package
        path: .

--- a/.github/workflows/deploy-to-hf-spaces.yml
+++ b/.github/workflows/deploy-to-hf-spaces.yml
+name: Deploy to HuggingFace Spaces
+on:
+  push:
+    branches:
+      - dev
+      - main
+  workflow_dispatch:
+jobs:
+  check-secret:
+    runs-on: ubuntu-latest
+    outputs:
+      token-set: ${{ steps.check-key.outputs.defined }}
+    steps:
+      - id: check-key
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        if: "${{ env.HF_TOKEN != '' }}"
+        run: echo "defined=true" >> $GITHUB_OUTPUT
+  deploy:
+    runs-on: ubuntu-latest
+    needs: [check-secret]
+    if: needs.check-secret.outputs.token-set == 'true'
+    env:
+      HF_TOKEN: ${{ secrets.HF_TOKEN }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Remove git history
+        run: rm -rf .git
+      - name: Prepend YAML front matter to README.md
+        run: |
+          echo "---" > temp_readme.md
+          echo "title: Open WebUI" >> temp_readme.md
+          echo "emoji: 🐳" >> temp_readme.md
+          echo "colorFrom: purple" >> temp_readme.md
+          echo "colorTo: gray" >> temp_readme.md
+          echo "sdk: docker" >> temp_readme.md
+          echo "app_port: 8080" >> temp_readme.md
+          echo "---" >> temp_readme.md
+          cat README.md >> temp_readme.md
+          mv temp_readme.md README.md
+      - name: Configure git
+        run: |
+          git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"
+          git config --global user.name "github-actions[bot]"
+      - name: Set up Git and push to Space
+        run: |
+          git init --initial-branch=main
+          git lfs track "*.ttf"
+          rm demo.gif
+          git add .
+          git commit -m "GitHub deploy: ${{ github.sha }}"
+          git push --force https://open-webui:${HF_TOKEN}@huggingface.co/spaces/open-webui/open-webui main
--- a/.github/workflows/docker-build.yaml
+++ b/.github/workflows/docker-build.yaml
@@ -63,6 +63,16 @@ jobs:
          flavor: |
            latest=${{ github.ref == 'refs/heads/main' }}
+      - name: Extract metadata for Docker cache
+        id: cache-meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.FULL_IMAGE_NAME }}
+          tags: |
+            type=ref,event=branch
+          flavor: |
+            prefix=cache-${{ matrix.platform }}-
      - name: Build Docker image (latest)
        uses: docker/build-push-action@v5
        id: build
@@ -72,8 +82,8 @@ jobs:
          platforms: ${{ matrix.platform }}
          labels: ${{ steps.meta.outputs.labels }}
          outputs: type=image,name=${{ env.FULL_IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true
-          cache-from: type=gha
+          cache-from: type=registry,ref=${{ steps.cache-meta.outputs.tags }}
-          cache-to: type=gha,mode=max
+          cache-to: type=registry,ref=${{ steps.cache-meta.outputs.tags }},mode=max
      - name: Export digest
        run: |
@@ -123,7 +133,7 @@ jobs:
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}
-      - name: Extract metadata for Docker images (default latest tag)
+      - name: Extract metadata for Docker images (cuda tag)
        id: meta
        uses: docker/metadata-action@v5
        with:
@@ -139,6 +149,16 @@ jobs:
            latest=${{ github.ref == 'refs/heads/main' }}
            suffix=-cuda,onlatest=true
+      - name: Extract metadata for Docker cache
+        id: cache-meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.FULL_IMAGE_NAME }}
+          tags: |
+            type=ref,event=branch
+          flavor: |
+            prefix=cache-cuda-${{ matrix.platform }}-
      - name: Build Docker image (cuda)
        uses: docker/build-push-action@v5
        id: build
@@ -148,8 +168,8 @@ jobs:
          platforms: ${{ matrix.platform }}
          labels: ${{ steps.meta.outputs.labels }}
          outputs: type=image,name=${{ env.FULL_IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true
-          cache-from: type=gha
+          cache-from: type=registry,ref=${{ steps.cache-meta.outputs.tags }}
-          cache-to: type=gha,mode=max
+          cache-to: type=registry,ref=${{ steps.cache-meta.outputs.tags }},mode=max
          build-args: USE_CUDA=true
      - name: Export digest
@@ -216,6 +236,16 @@ jobs:
            latest=${{ github.ref == 'refs/heads/main' }}
            suffix=-ollama,onlatest=true
+      - name: Extract metadata for Docker cache
+        id: cache-meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.FULL_IMAGE_NAME }}
+          tags: |
+            type=ref,event=branch
+          flavor: |
+            prefix=cache-ollama-${{ matrix.platform }}-
      - name: Build Docker image (ollama)
        uses: docker/build-push-action@v5
        id: build
@@ -225,8 +255,8 @@ jobs:
          platforms: ${{ matrix.platform }}
          labels: ${{ steps.meta.outputs.labels }}
          outputs: type=image,name=${{ env.FULL_IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true
-          cache-from: type=gha
+          cache-from: type=registry,ref=${{ steps.cache-meta.outputs.tags }}
-          cache-to: type=gha,mode=max
+          cache-to: type=registry,ref=${{ steps.cache-meta.outputs.tags }},mode=max
          build-args: USE_OLLAMA=true
      - name: Export digest

--- a/.github/workflows/format-backend.yaml
+++ b/.github/workflows/format-backend.yaml
@@ -23,7 +23,7 @@ jobs:
      - uses: actions/checkout@v4
      - name: Set up Python
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v4
        with:
          python-version: ${{ matrix.python-version }}

--- a/.github/workflows/format-build-frontend.yaml
+++ b/.github/workflows/format-build-frontend.yaml
@@ -19,7 +19,7 @@ jobs:
        uses: actions/checkout@v4
      - name: Setup Node.js
-        uses: actions/setup-node@v3
+        uses: actions/setup-node@v4
        with:
          node-version: '20' # Or specify any other version you want to use

--- a/.github/workflows/integration-test.yml
+++ b/.github/workflows/integration-test.yml
@@ -20,7 +20,11 @@ jobs:
      - name: Build and run Compose Stack
        run: |
-          docker compose --file docker-compose.yaml --file docker-compose.api.yaml up --detach --build
+          docker compose \
+            --file docker-compose.yaml \
+            --file docker-compose.api.yaml \
+            --file docker-compose.a1111-test.yaml \
+            up --detach --build
      - name: Wait for Ollama to be up
        timeout-minutes: 5
@@ -95,7 +99,7 @@ jobs:
        uses: actions/checkout@v4
      - name: Set up Python
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

--- a/.github/workflows/release-pypi.yml
+++ b/.github/workflows/release-pypi.yml
+name: Release to PyPI
+on:
+  push:
+    branches:
+      - main # or whatever branch you want to use
+      - dev
+jobs:
+  release:
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/p/open-webui
+    permissions:
+      id-token: write
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 18
+      - uses: actions/setup-python@v5
+        with:
+          python-version: 3.11
+      - name: Build
+        run: |
+          python -m pip install --upgrade pip
+          pip install build
+          python -m build .
+      - name: Publish package distributions to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
--- a/Dockerfile
+++ b/Dockerfile
@@ -132,7 +132,8 @@ RUN pip3 install uv && \
    uv pip install --system -r requirements.txt --no-cache-dir && \
    python -c "import os; from sentence_transformers import SentenceTransformer; SentenceTransformer(os.environ['RAG_EMBEDDING_MODEL'], device='cpu')" && \
    python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"; \
-    fi
+    fi; \
+    chown -R $UID:$GID /app/backend/data/

--- a/backend/apps/ollama/main.py
+++ b/backend/apps/ollama/main.py
@@ -43,6 +43,7 @@ from utils.utils import (
 from config import (
    SRC_LOG_LEVELS,
    OLLAMA_BASE_URLS,
+    ENABLE_OLLAMA_API,
    ENABLE_MODEL_FILTER,
    MODEL_FILTER_LIST,
    UPLOAD_DIR,
@@ -67,6 +68,8 @@ app.state.config = AppConfig()
 app.state.config.ENABLE_MODEL_FILTER = ENABLE_MODEL_FILTER
 app.state.config.MODEL_FILTER_LIST = MODEL_FILTER_LIST
+app.state.config.ENABLE_OLLAMA_API = ENABLE_OLLAMA_API
 app.state.config.OLLAMA_BASE_URLS = OLLAMA_BASE_URLS
 app.state.MODELS = {}
@@ -96,6 +99,21 @@ async def get_status():
    return {"status": True}
+@app.get("/config")
+async def get_config(user=Depends(get_admin_user)):
+    return {"ENABLE_OLLAMA_API": app.state.config.ENABLE_OLLAMA_API}
+class OllamaConfigForm(BaseModel):
+    enable_ollama_api: Optional[bool] = None
+@app.post("/config/update")
+async def update_config(form_data: OllamaConfigForm, user=Depends(get_admin_user)):
+    app.state.config.ENABLE_OLLAMA_API = form_data.enable_ollama_api
+    return {"ENABLE_OLLAMA_API": app.state.config.ENABLE_OLLAMA_API}
 @app.get("/urls")
 async def get_ollama_api_urls(user=Depends(get_admin_user)):
    return {"OLLAMA_BASE_URLS": app.state.config.OLLAMA_BASE_URLS}
@@ -156,14 +174,23 @@ def merge_models_lists(model_lists):
 async def get_all_models():
    log.info("get_all_models()")
-    tasks = [fetch_url(f"{url}/api/tags") for url in app.state.config.OLLAMA_BASE_URLS]
-    responses = await asyncio.gather(*tasks)
-    models = {
+    if app.state.config.ENABLE_OLLAMA_API:
-        "models": merge_models_lists(
+        tasks = [
-            map(lambda response: response["models"] if response else None, responses)
+            fetch_url(f"{url}/api/tags") for url in app.state.config.OLLAMA_BASE_URLS
-        )
+        ]
-    }
+        responses = await asyncio.gather(*tasks)
+        models = {
+            "models": merge_models_lists(
+                map(
+                    lambda response: response["models"] if response else None, responses
+                )
+            )
+        }
+    else:
+        models = {"models": []}
    app.state.MODELS = {model["model"]: model for model in models["models"]}

--- a/backend/apps/openai/main.py
+++ b/backend/apps/openai/main.py
@@ -306,6 +306,7 @@ async def get_models(url_idx: Optional[int] = None, user=Depends(get_current_use
 @app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
 async def proxy(path: str, request: Request, user=Depends(get_verified_user)):
    idx = 0
+    pipeline = False
    body = await request.body()
    # TODO: Remove below after gpt-4-vision fix from Open AI
@@ -314,7 +315,15 @@ async def proxy(path: str, request: Request, user=Depends(get_verified_user)):
        body = body.decode("utf-8")
        body = json.loads(body)
-        idx = app.state.MODELS[body.get("model")]["urlIdx"]
+        model = app.state.MODELS[body.get("model")]
+        idx = model["urlIdx"]
+        if "pipeline" in model:
+            pipeline = model.get("pipeline")
+        if pipeline:
+            body["user"] = {"name": user.name, "id": user.id}
        # Check if the model is "gpt-4-vision-preview" and set "max_tokens" to 4000
        # This is a workaround until OpenAI fixes the issue with this model

--- a/backend/apps/web/internal/db.py
+++ b/backend/apps/web/internal/db.py
 from peewee import *
 from peewee_migrate import Router
 from playhouse.db_url import connect
-from config import SRC_LOG_LEVELS, DATA_DIR, DATABASE_URL
+from config import SRC_LOG_LEVELS, DATA_DIR, DATABASE_URL, BACKEND_DIR
 import os
 import logging
@@ -18,6 +18,8 @@ else:
 DB = connect(DATABASE_URL)
 log.info(f"Connected to a {DB.__class__.__name__} database.")
-router = Router(DB, migrate_dir="apps/web/internal/migrations", logger=log)
+router = Router(
+    DB, migrate_dir=BACKEND_DIR / "apps" / "web" / "internal" / "migrations", logger=log
+)
 router.run()
 DB.connect(reuse_if_open=True)
--- a/backend/config.py
+++ b/backend/config.py
 import os
 import sys
 import logging
+import importlib.metadata
+import pkgutil
 import chromadb
 from chromadb import Settings
 from base64 import b64encode
@@ -22,10 +24,13 @@ from constants import ERROR_MESSAGES
 # Load .env file
 ####################################
+BACKEND_DIR = Path(__file__).parent  # the path containing this file
+BASE_DIR = BACKEND_DIR.parent  # the path containing the backend/
 try:
    from dotenv import load_dotenv, find_dotenv
-    load_dotenv(find_dotenv("../.env"))
+    load_dotenv(find_dotenv(str(BASE_DIR / ".env")))
 except ImportError:
    print("dotenv not installed, skipping...")
@@ -87,10 +92,12 @@ WEBUI_FAVICON_URL = "https://openwebui.com/favicon.png"
 ENV = os.environ.get("ENV", "dev")
 try:
-    with open(f"../package.json", "r") as f:
+    PACKAGE_DATA = json.loads((BASE_DIR / "package.json").read_text())
-        PACKAGE_DATA = json.load(f)
 except:
-    PACKAGE_DATA = {"version": "0.0.0"}
+    try:
+        PACKAGE_DATA = {"version": importlib.metadata.version("open-webui")}
+    except importlib.metadata.PackageNotFoundError:
+        PACKAGE_DATA = {"version": "0.0.0"}
 VERSION = PACKAGE_DATA["version"]
@@ -115,10 +122,10 @@ def parse_section(section):
 try:
-    with open("../CHANGELOG.md", "r") as file:
+    changelog_content = (BASE_DIR / "CHANGELOG.md").read_text()
-        changelog_content = file.read()
 except:
-    changelog_content = ""
+    changelog_content = (pkgutil.get_data("open_webui", "CHANGELOG.md") or b"").decode()
 # Convert markdown content to HTML
 html_content = markdown.markdown(changelog_content)
@@ -164,12 +171,11 @@ WEBUI_VERSION = os.environ.get("WEBUI_VERSION", "v1.0.0-alpha.100")
 # DATA/FRONTEND BUILD DIR
 ####################################
-DATA_DIR = str(Path(os.getenv("DATA_DIR", "./data")).resolve())
+DATA_DIR = Path(os.getenv("DATA_DIR", BACKEND_DIR / "data")).resolve()
-FRONTEND_BUILD_DIR = str(Path(os.getenv("FRONTEND_BUILD_DIR", "../build")))
+FRONTEND_BUILD_DIR = Path(os.getenv("FRONTEND_BUILD_DIR", BASE_DIR / "build")).resolve()
 try:
-    with open(f"{DATA_DIR}/config.json", "r") as f:
+    CONFIG_DATA = json.loads((DATA_DIR / "config.json").read_text())
-        CONFIG_DATA = json.load(f)
 except:
    CONFIG_DATA = {}
@@ -279,11 +285,11 @@ JWT_EXPIRES_IN = PersistentConfig(
 # Static DIR
 ####################################
-STATIC_DIR = str(Path(os.getenv("STATIC_DIR", "./static")).resolve())
+STATIC_DIR = Path(os.getenv("STATIC_DIR", BACKEND_DIR / "static")).resolve()
-frontend_favicon = f"{FRONTEND_BUILD_DIR}/favicon.png"
+frontend_favicon = FRONTEND_BUILD_DIR / "favicon.png"
-if os.path.exists(frontend_favicon):
+if frontend_favicon.exists():
-    shutil.copyfile(frontend_favicon, f"{STATIC_DIR}/favicon.png")
+    shutil.copyfile(frontend_favicon, STATIC_DIR / "favicon.png")
 else:
    logging.warning(f"Frontend favicon not found at {frontend_favicon}")
@@ -378,6 +384,13 @@ if not os.path.exists(LITELLM_CONFIG_PATH):
 # OLLAMA_BASE_URL
 ####################################
+ENABLE_OLLAMA_API = PersistentConfig(
+    "ENABLE_OLLAMA_API",
+    "ollama.enable",
+    os.environ.get("ENABLE_OLLAMA_API", "True").lower() == "true",
+)
 OLLAMA_API_BASE_URL = os.environ.get(
    "OLLAMA_API_BASE_URL", "http://localhost:11434/api"
 )

--- a/backend/main.py
+++ b/backend/main.py
@@ -8,6 +8,7 @@ import sys
 import logging
 import aiohttp
 import requests
+import mimetypes
 from fastapi import FastAPI, Request, Depends, status
 from fastapi.staticfiles import StaticFiles
@@ -410,6 +411,7 @@ app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
 app.mount("/cache", StaticFiles(directory=CACHE_DIR), name="cache")
 if os.path.exists(FRONTEND_BUILD_DIR):
+    mimetypes.add_type("text/javascript", ".js")
    app.mount(
        "/",
        SPAStaticFiles(directory=FRONTEND_BUILD_DIR, html=True),

--- a/backend/open_webui/__init__.py
+++ b/backend/open_webui/__init__.py
+import base64
+import os
+import random
+from pathlib import Path
+import typer
+import uvicorn
+app = typer.Typer()
+KEY_FILE = Path.cwd() / ".webui_secret_key"
+if (frontend_build_dir := Path(__file__).parent / "frontend").exists():
+    os.environ["FRONTEND_BUILD_DIR"] = str(frontend_build_dir)
+@app.command()
+def serve(
+    host: str = "0.0.0.0",
+    port: int = 8080,
+):
+    if os.getenv("WEBUI_SECRET_KEY") is None:
+        typer.echo(
+            "Loading WEBUI_SECRET_KEY from file, not provided as an environment variable."
+        )
+        if not KEY_FILE.exists():
+            typer.echo(f"Generating a new secret key and saving it to {KEY_FILE}")
+            KEY_FILE.write_bytes(base64.b64encode(random.randbytes(12)))
+        typer.echo(f"Loading WEBUI_SECRET_KEY from {KEY_FILE}")
+        os.environ["WEBUI_SECRET_KEY"] = KEY_FILE.read_text()
+    if os.getenv("USE_CUDA_DOCKER", "false") == "true":
+        typer.echo(
+            "CUDA is enabled, appending LD_LIBRARY_PATH to include torch/cudnn & cublas libraries."
+        )
+        LD_LIBRARY_PATH = os.getenv("LD_LIBRARY_PATH", "").split(":")
+        os.environ["LD_LIBRARY_PATH"] = ":".join(
+            LD_LIBRARY_PATH
+            + [
+                "/usr/local/lib/python3.11/site-packages/torch/lib",
+                "/usr/local/lib/python3.11/site-packages/nvidia/cudnn/lib",
+            ]
+        )
+    import main  # we need set environment variables before importing main
+    uvicorn.run(main.app, host=host, port=port, forwarded_allow_ips="*")
+@app.command()
+def dev(
+    host: str = "0.0.0.0",
+    port: int = 8080,
+    reload: bool = True,
+):
+    uvicorn.run(
+        "main:app", host=host, port=port, reload=reload, forwarded_allow_ips="*"
+    )
+if __name__ == "__main__":
+    app()
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
-fastapi==0.109.2
+fastapi==0.111.0
 uvicorn[standard]==0.22.0
 pydantic==2.7.1
 python-multipart==0.0.9
 Flask==3.0.3
-Flask-Cors==4.0.0
+Flask-Cors==4.0.1
 python-socketio==5.11.2
 python-jose==3.3.0
 passlib[bcrypt]==1.7.4
-requests==2.31.0
+requests==2.32.2
 aiohttp==3.9.5
-peewee==3.17.3
+peewee==3.17.5
 peewee-migrate==1.12.2
 psycopg2-binary==2.9.9
-PyMySQL==1.1.0
+PyMySQL==1.1.1
-bcrypt==4.1.2
+bcrypt==4.1.3
-litellm[proxy]==1.35.28
+litellm[proxy]==1.37.20
-boto3==1.34.95
+boto3==1.34.110
 argon2-cffi==23.1.0
 APScheduler==3.10.4
-google-generativeai==0.5.2
+google-generativeai==0.5.4
-langchain==0.1.16
+langchain==0.2.0
-langchain-community==0.0.34
+langchain-community==0.2.0
-langchain-chroma==0.1.0
+langchain-chroma==0.1.1
 fake-useragent==1.5.1
-chromadb==0.4.24
+chromadb==0.5.0
 sentence-transformers==2.7.0
 pypdf==4.2.0
 docx2txt==0.8
 python-pptx==0.6.23
-unstructured==0.11.8
+unstructured==0.14.0
 Markdown==3.6
 pypandoc==1.13
 pandas==2.2.2
@@ -46,16 +46,16 @@ xlrd==2.0.1
 validators==0.28.1
 opencv-python-headless==4.9.0.80
-rapidocr-onnxruntime==1.2.3
+rapidocr-onnxruntime==1.3.22
-fpdf2==2.7.8
+fpdf2==2.7.9
 rank-bm25==0.2.2
-faster-whisper==1.0.1
+faster-whisper==1.0.2
 PyJWT[crypto]==2.8.0
 black==24.4.2
-langfuse==2.27.3
+langfuse==2.33.0
 youtube-transcript-api==0.6.2
-pytube
+pytube==15.0.0
\ No newline at end of file
--- a/backend/space/litellm_config.yaml
+++ b/backend/space/litellm_config.yaml
+litellm_settings:
+  drop_params: true
+model_list:
+  - model_name: 'HuggingFace: Mistral: Mistral 7B Instruct v0.1'
+    litellm_params:
+      model: huggingface/mistralai/Mistral-7B-Instruct-v0.1
+      api_key: os.environ/HF_TOKEN
+      max_tokens: 1024
+  - model_name: 'HuggingFace: Mistral: Mistral 7B Instruct v0.2'
+    litellm_params:
+      model: huggingface/mistralai/Mistral-7B-Instruct-v0.2
+      api_key: os.environ/HF_TOKEN
+      max_tokens: 1024
+  - model_name: 'HuggingFace: Meta: Llama 3 8B Instruct'
+    litellm_params:
+      model: huggingface/meta-llama/Meta-Llama-3-8B-Instruct
+      api_key: os.environ/HF_TOKEN
+      max_tokens: 2047
+  - model_name: 'HuggingFace: Mistral: Mixtral 8x7B Instruct v0.1'
+    litellm_params:
+      model: huggingface/mistralai/Mixtral-8x7B-Instruct-v0.1
+      api_key: os.environ/HF_TOKEN
+      max_tokens: 8192
+  - model_name: 'HuggingFace: Microsoft: Phi-3 Mini-4K-Instruct'
+    litellm_params:
+      model: huggingface/microsoft/Phi-3-mini-4k-instruct
+      api_key: os.environ/HF_TOKEN
+      max_tokens: 1024
+  - model_name: 'HuggingFace: Google: Gemma 7B 1.1'
+    litellm_params:
+      model: huggingface/google/gemma-1.1-7b-it
+      api_key: os.environ/HF_TOKEN
+      max_tokens: 1024
+  - model_name: 'HuggingFace: Yi-1.5 34B Chat'
+    litellm_params:
+      model: huggingface/01-ai/Yi-1.5-34B-Chat
+      api_key: os.environ/HF_TOKEN
+      max_tokens: 1024
+  - model_name: 'HuggingFace: Nous Research: Nous Hermes 2 Mixtral 8x7B DPO'
+    litellm_params:
+      model: huggingface/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO
+      api_key: os.environ/HF_TOKEN
+      max_tokens: 2048
--- a/backend/start.sh
+++ b/backend/start.sh
@@ -30,4 +30,34 @@ if [ "$USE_CUDA_DOCKER" = "true" ]; then
  export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/python3.11/site-packages/torch/lib:/usr/local/lib/python3.11/site-packages/nvidia/cudnn/lib"
 fi
+# Check if SPACE_ID is set, if so, configure for space
+if [ -n "$SPACE_ID" ]; then
+  echo "Configuring for HuggingFace Space deployment"
+  # Copy litellm_config.yaml with specified ownership
+  echo "Copying litellm_config.yaml to the desired location with specified ownership..."
+  cp -f ./space/litellm_config.yaml ./data/litellm/config.yaml
+  if [ -n "$ADMIN_USER_EMAIL" ] && [ -n "$ADMIN_USER_PASSWORD" ]; then
+    echo "Admin user configured, creating"
+    WEBUI_SECRET_KEY="$WEBUI_SECRET_KEY" uvicorn main:app --host "$HOST" --port "$PORT" --forwarded-allow-ips '*' &
+    webui_pid=$!
+    echo "Waiting for webui to start..."
+    while ! curl -s http://localhost:8080/health > /dev/null; do
+      sleep 1
+    done
+    echo "Creating admin user..."
+    curl \
+      -X POST "http://localhost:8080/api/v1/auths/signup" \
+      -H "accept: application/json" \
+      -H "Content-Type: application/json" \
+      -d "{ \"email\": \"${ADMIN_USER_EMAIL}\", \"password\": \"${ADMIN_USER_PASSWORD}\", \"name\": \"Admin\" }"
+    echo "Shutting down webui..."
+    kill $webui_pid
+  fi
+  export WEBUI_URL=${SPACE_HOST}
+fi
 WEBUI_SECRET_KEY="$WEBUI_SECRET_KEY" exec uvicorn main:app --host "$HOST" --port "$PORT" --forwarded-allow-ips '*'
--- a/cypress/e2e/chat.cy.ts
+++ b/cypress/e2e/chat.cy.ts
@@ -74,5 +74,28 @@ describe('Settings', () => {
 				expect(spy).to.be.callCount(2);
 			});
 		});
+		it('user can generate image', () => {
+			// Click on the model selector
+			cy.get('button[aria-label="Select a model"]').click();
+			// Select the first model
+			cy.get('button[aria-label="model-item"]').first().click();
+			// Type a message
+			cy.get('#chat-textarea').type('Hi, what can you do? A single sentence only please.', {
+				force: true
+			});
+			// Send the message
+			cy.get('button[type="submit"]').click();
+			// User's message should be visible
+			cy.get('.chat-user').should('exist');
+			// Wait for the response
+			cy.get('.chat-assistant', { timeout: 120_000 }) // .chat-assistant is created after the first token is received
+				.find('div[aria-label="Generation Info"]', { timeout: 120_000 }) // Generation Info is created after the stop token is received
+				.should('exist');
+			// Click on the generate image button
+			cy.get('[aria-label="Generate Image"]').click();
+			// Wait for image to be visible
+			cy.get('img[data-cy="image"]', { timeout: 60_000 }).should('be.visible');
+		});
 	});
 });
--- a/docker-compose.a1111-test.yaml
+++ b/docker-compose.a1111-test.yaml
+# This is an overlay that spins up stable-diffusion-webui for integration testing
+# This is not designed to be used in production
+services:
+  stable-diffusion-webui:
+    # Not built for ARM64
+    platform: linux/amd64
+    image: ghcr.io/neggles/sd-webui-docker:latest
+    restart: unless-stopped
+    environment:
+      CLI_ARGS: "--api --use-cpu all --precision full --no-half --skip-torch-cuda-test --ckpt /empty.pt --do-not-download-clip --disable-nan-check --disable-opt-split-attention"
+      PYTHONUNBUFFERED: "1"
+      TERM: "vt100"
+      SD_WEBUI_VARIANT: "default"
+    # Hack to get container working on Apple Silicon
+    # Rosetta creates a conflict ${HOME}/.cache folder
+    entrypoint: /bin/bash
+    command:
+      - -c
+      - |
+        export HOME=/root-home
+        rm -rf $${HOME}/.cache
+        /docker/entrypoint.sh python -u webui.py --listen --port $${WEBUI_PORT} --skip-version-check $${CLI_ARGS}
+    volumes:
+      - ./test/test_files/image_gen/sd-empty.pt:/empty.pt
+  open-webui:
+    environment:
+      ENABLE_IMAGE_GENERATION: "true"
+      AUTOMATIC1111_BASE_URL: http://stable-diffusion-webui:7860
+      IMAGE_SIZE: "64x64"
+      IMAGE_STEPS: "3"