Merge remote-tracking branch 'upstream/dev' into feat/include-git-hash-everywhere

aba63088 · Jun Siang Cheah · 4fdb26fd · 7b81271b · aba63088 · aba63088
Commit aba63088 authored May 26, 2024 by Jun Siang Cheah
20 changed files
--- a/.env.example
+++ b/.env.example
@@ -11,7 +11,3 @@ OPENAI_API_KEY=''
 SCARF_NO_ANALYTICS=true
 DO_NOT_TRACK=true
 ANONYMIZED_TELEMETRY=false
+\ No newline at end of file
-
-# Use locally bundled version of the LiteLLM cost map json
-# to avoid repetitive startup connections
-LITELLM_LOCAL_MODEL_COST_MAP="True"
\ No newline at end of file
--- a/Dockerfile
+++ b/Dockerfile
@@ -63,11 +63,6 @@ ENV OPENAI_API_KEY="" \
    DO_NOT_TRACK=true \
    ANONYMIZED_TELEMETRY=false

-# Use locally bundled version of the LiteLLM cost map json
-# to avoid repetitive startup connections
-ENV LITELLM_LOCAL_MODEL_COST_MAP="True"
-
-
 #### Other models #########################################################
 ## whisper TTS model settings ##
 ENV WHISPER_MODEL="base" \

--- a/backend/apps/litellm/main.py
+++ b/backend/apps/litellm/main.py
-import sys
-from contextlib import asynccontextmanager
-
-from fastapi import FastAPI, Depends, HTTPException
-from fastapi.routing import APIRoute
-from fastapi.middleware.cors import CORSMiddleware
-
-import logging
-from fastapi import FastAPI, Request, Depends, status, Response
-from fastapi.responses import JSONResponse
-
-from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
-from starlette.responses import StreamingResponse
-import json
-import time
-import requests
-
-from pydantic import BaseModel, ConfigDict
-from typing import Optional, List
-
-from utils.utils import get_verified_user, get_current_user, get_admin_user
-from config import SRC_LOG_LEVELS, ENV
-from constants import MESSAGES
-
-import os
-
-log = logging.getLogger(__name__)
-log.setLevel(SRC_LOG_LEVELS["LITELLM"])
-
-
-from config import (
-    ENABLE_LITELLM,
-    ENABLE_MODEL_FILTER,
-    MODEL_FILTER_LIST,
-    DATA_DIR,
-    LITELLM_PROXY_PORT,
-    LITELLM_PROXY_HOST,
-)
-
-import warnings
-
-warnings.simplefilter("ignore")
-
-from litellm.utils import get_llm_provider
-
-import asyncio
-import subprocess
-import yaml
-
-
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    log.info("startup_event")
-    # TODO: Check config.yaml file and create one
-    asyncio.create_task(start_litellm_background())
-    yield
-
-
-app = FastAPI(lifespan=lifespan)
-
-origins = ["*"]
-
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=origins,
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-
-
-LITELLM_CONFIG_DIR = f"{DATA_DIR}/litellm/config.yaml"
-
-with open(LITELLM_CONFIG_DIR, "r") as file:
-    litellm_config = yaml.safe_load(file)
-
-
-app.state.ENABLE_MODEL_FILTER = ENABLE_MODEL_FILTER.value
-app.state.MODEL_FILTER_LIST = MODEL_FILTER_LIST.value
-
-
-app.state.ENABLE = ENABLE_LITELLM
-app.state.CONFIG = litellm_config
-
-# Global variable to store the subprocess reference
-background_process = None
-
-CONFLICT_ENV_VARS = [
-    # Uvicorn uses PORT, so LiteLLM might use it as well
-    "PORT",
-    # LiteLLM uses DATABASE_URL for Prisma connections
-    "DATABASE_URL",
-]
-
-
-async def run_background_process(command):
-    global background_process
-    log.info("run_background_process")
-
-    try:
-        # Log the command to be executed
-        log.info(f"Executing command: {command}")
-        # Filter environment variables known to conflict with litellm
-        env = {k: v for k, v in os.environ.items() if k not in CONFLICT_ENV_VARS}
-        # Execute the command and create a subprocess
-        process = await asyncio.create_subprocess_exec(
-            *command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env
-        )
-        background_process = process
-        log.info("Subprocess started successfully.")
-
-        # Capture STDERR for debugging purposes
-        stderr_output = await process.stderr.read()
-        stderr_text = stderr_output.decode().strip()
-        if stderr_text:
-            log.info(f"Subprocess STDERR: {stderr_text}")
-
-        # log.info output line by line
-        async for line in process.stdout:
-            log.info(line.decode().strip())
-
-        # Wait for the process to finish
-        returncode = await process.wait()
-        log.info(f"Subprocess exited with return code {returncode}")
-    except Exception as e:
-        log.error(f"Failed to start subprocess: {e}")
-        raise  # Optionally re-raise the exception if you want it to propagate
-
-
-async def start_litellm_background():
-    log.info("start_litellm_background")
-    # Command to run in the background
-    command = [
-        "litellm",
-        "--port",
-        str(LITELLM_PROXY_PORT),
-        "--host",
-        LITELLM_PROXY_HOST,
-        "--telemetry",
-        "False",
-        "--config",
-        LITELLM_CONFIG_DIR,
-    ]
-
-    await run_background_process(command)
-
-
-async def shutdown_litellm_background():
-    log.info("shutdown_litellm_background")
-    global background_process
-    if background_process:
-        background_process.terminate()
-        await background_process.wait()  # Ensure the process has terminated
-        log.info("Subprocess terminated")
-        background_process = None
-
-
-@app.get("/")
-async def get_status():
-    return {"status": True}
-
-
-async def restart_litellm():
-    """
-    Endpoint to restart the litellm background service.
-    """
-    log.info("Requested restart of litellm service.")
-    try:
-        # Shut down the existing process if it is running
-        await shutdown_litellm_background()
-        log.info("litellm service shutdown complete.")
-
-        # Restart the background service
-
-        asyncio.create_task(start_litellm_background())
-        log.info("litellm service restart complete.")
-
-        return {
-            "status": "success",
-            "message": "litellm service restarted successfully.",
-        }
-    except Exception as e:
-        log.info(f"Error restarting litellm service: {e}")
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)
-        )
-
-
-@app.get("/restart")
-async def restart_litellm_handler(user=Depends(get_admin_user)):
-    return await restart_litellm()
-
-
-@app.get("/config")
-async def get_config(user=Depends(get_admin_user)):
-    return app.state.CONFIG
-
-
-class LiteLLMConfigForm(BaseModel):
-    general_settings: Optional[dict] = None
-    litellm_settings: Optional[dict] = None
-    model_list: Optional[List[dict]] = None
-    router_settings: Optional[dict] = None
-
-    model_config = ConfigDict(protected_namespaces=())
-
-
-@app.post("/config/update")
-async def update_config(form_data: LiteLLMConfigForm, user=Depends(get_admin_user)):
-    app.state.CONFIG = form_data.model_dump(exclude_none=True)
-
-    with open(LITELLM_CONFIG_DIR, "w") as file:
-        yaml.dump(app.state.CONFIG, file)
-
-    await restart_litellm()
-    return app.state.CONFIG
-
-
-@app.get("/models")
-@app.get("/v1/models")
-async def get_models(user=Depends(get_current_user)):
-
-    if app.state.ENABLE:
-        while not background_process:
-            await asyncio.sleep(0.1)
-
-        url = f"http://localhost:{LITELLM_PROXY_PORT}/v1"
-        r = None
-        try:
-            r = requests.request(method="GET", url=f"{url}/models")
-            r.raise_for_status()
-
-            data = r.json()
-
-            if app.state.ENABLE_MODEL_FILTER:
-                if user and user.role == "user":
-                    data["data"] = list(
-                        filter(
-                            lambda model: model["id"] in app.state.MODEL_FILTER_LIST,
-                            data["data"],
-                        )
-                    )
-
-            return data
-        except Exception as e:
-
-            log.exception(e)
-            error_detail = "Open WebUI: Server Connection Error"
-            if r is not None:
-                try:
-                    res = r.json()
-                    if "error" in res:
-                        error_detail = f"External: {res['error']}"
-                except:
-                    error_detail = f"External: {e}"
-
-            return {
-                "data": [
-                    {
-                        "id": model["model_name"],
-                        "object": "model",
-                        "created": int(time.time()),
-                        "owned_by": "openai",
-                    }
-                    for model in app.state.CONFIG["model_list"]
-                ],
-                "object": "list",
-            }
-    else:
-        return {
-            "data": [],
-            "object": "list",
-        }
-
-
-@app.get("/model/info")
-async def get_model_list(user=Depends(get_admin_user)):
-    return {"data": app.state.CONFIG["model_list"]}
-
-
-class AddLiteLLMModelForm(BaseModel):
-    model_name: str
-    litellm_params: dict
-
-    model_config = ConfigDict(protected_namespaces=())
-
-
-@app.post("/model/new")
-async def add_model_to_config(
-    form_data: AddLiteLLMModelForm, user=Depends(get_admin_user)
-):
-    try:
-        get_llm_provider(model=form_data.model_name)
-        app.state.CONFIG["model_list"].append(form_data.model_dump())
-
-        with open(LITELLM_CONFIG_DIR, "w") as file:
-            yaml.dump(app.state.CONFIG, file)
-
-        await restart_litellm()
-
-        return {"message": MESSAGES.MODEL_ADDED(form_data.model_name)}
-    except Exception as e:
-        print(e)
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)
-        )
-
-
-class DeleteLiteLLMModelForm(BaseModel):
-    id: str
-
-
-@app.post("/model/delete")
-async def delete_model_from_config(
-    form_data: DeleteLiteLLMModelForm, user=Depends(get_admin_user)
-):
-    app.state.CONFIG["model_list"] = [
-        model
-        for model in app.state.CONFIG["model_list"]
-        if model["model_name"] != form_data.id
-    ]
-
-    with open(LITELLM_CONFIG_DIR, "w") as file:
-        yaml.dump(app.state.CONFIG, file)
-
-    await restart_litellm()
-
-    return {"message": MESSAGES.MODEL_DELETED(form_data.id)}
-
-
-@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
-async def proxy(path: str, request: Request, user=Depends(get_verified_user)):
-    body = await request.body()
-
-    url = f"http://localhost:{LITELLM_PROXY_PORT}"
-
-    target_url = f"{url}/{path}"
-
-    headers = {}
-    # headers["Authorization"] = f"Bearer {key}"
-    headers["Content-Type"] = "application/json"
-
-    r = None
-
-    try:
-        r = requests.request(
-            method=request.method,
-            url=target_url,
-            data=body,
-            headers=headers,
-            stream=True,
-        )
-
-        r.raise_for_status()
-
-        # Check if response is SSE
-        if "text/event-stream" in r.headers.get("Content-Type", ""):
-            return StreamingResponse(
-                r.iter_content(chunk_size=8192),
-                status_code=r.status_code,
-                headers=dict(r.headers),
-            )
-        else:
-            response_data = r.json()
-            return response_data
-    except Exception as e:
-        log.exception(e)
-        error_detail = "Open WebUI: Server Connection Error"
-        if r is not None:
-            try:
-                res = r.json()
-                if "error" in res:
-                    error_detail = f"External: {res['error']['message'] if 'message' in res['error'] else res['error']}"
-            except:
-                error_detail = f"External: {e}"
-
-        raise HTTPException(
-            status_code=r.status_code if r else 500, detail=error_detail
-        )
--- a/backend/apps/ollama/main.py
+++ b/backend/apps/ollama/main.py
@@ -29,8 +29,8 @@ import time
 from urllib.parse import urlparse
 from typing import Optional, List, Union

-
-from apps.web.models.users import Users
+from apps.webui.models.models import Models
+from apps.webui.models.users import Users
 from constants import ERROR_MESSAGES
 from utils.utils import (
    decode_token,
@@ -39,6 +39,8 @@ from utils.utils import (
    get_admin_user,
 )

+from utils.models import get_model_id_from_custom_model_id
+

 from config import (
    SRC_LOG_LEVELS,
@@ -68,7 +70,6 @@ app.state.config = AppConfig()
 app.state.config.ENABLE_MODEL_FILTER = ENABLE_MODEL_FILTER
 app.state.config.MODEL_FILTER_LIST = MODEL_FILTER_LIST

-
 app.state.config.ENABLE_OLLAMA_API = ENABLE_OLLAMA_API
 app.state.config.OLLAMA_BASE_URLS = OLLAMA_BASE_URLS
 app.state.MODELS = {}
@@ -305,6 +306,9 @@ async def pull_model(

    r = None

+    # Admin should be able to pull models from any source
+    payload = {**form_data.model_dump(exclude_none=True), "insecure": True}
+
    def get_request():
        nonlocal url
        nonlocal r
@@ -332,7 +336,7 @@ async def pull_model(
            r = requests.request(
                method="POST",
                url=f"{url}/api/pull",
-                data=form_data.model_dump_json(exclude_none=True).encode(),
+                data=json.dumps(payload),
                stream=True,
            )

@@ -875,14 +879,93 @@ async def generate_chat_completion(
    user=Depends(get_verified_user),
 ):

-    if url_idx == None:
-        model = form_data.model
+    log.debug(
+        "form_data.model_dump_json(exclude_none=True).encode(): {0} ".format(
+            form_data.model_dump_json(exclude_none=True).encode()
+        )
+    )

-        if ":" not in model:
-            model = f"{model}:latest"
+    payload = {
+        **form_data.model_dump(exclude_none=True),
+    }

-        if model in app.state.MODELS:
-            url_idx = random.choice(app.state.MODELS[model]["urls"])
+    model_id = form_data.model
+    model_info = Models.get_model_by_id(model_id)
+
+    if model_info:
+        print(model_info)
+        if model_info.base_model_id:
+            payload["model"] = model_info.base_model_id
+
+        model_info.params = model_info.params.model_dump()
+
+        if model_info.params:
+            payload["options"] = {}
+
+            payload["options"]["mirostat"] = model_info.params.get("mirostat", None)
+            payload["options"]["mirostat_eta"] = model_info.params.get(
+                "mirostat_eta", None
+            )
+            payload["options"]["mirostat_tau"] = model_info.params.get(
+                "mirostat_tau", None
+            )
+            payload["options"]["num_ctx"] = model_info.params.get("num_ctx", None)
+
+            payload["options"]["repeat_last_n"] = model_info.params.get(
+                "repeat_last_n", None
+            )
+            payload["options"]["repeat_penalty"] = model_info.params.get(
+                "frequency_penalty", None
+            )
+
+            payload["options"]["temperature"] = model_info.params.get(
+                "temperature", None
+            )
+            payload["options"]["seed"] = model_info.params.get("seed", None)
+
+            payload["options"]["stop"] = (
+                [
+                    bytes(stop, "utf-8").decode("unicode_escape")
+                    for stop in model_info.params["stop"]
+                ]
+                if model_info.params.get("stop", None)
+                else None
+            )
+
+            payload["options"]["tfs_z"] = model_info.params.get("tfs_z", None)
+
+            payload["options"]["num_predict"] = model_info.params.get(
+                "max_tokens", None
+            )
+            payload["options"]["top_k"] = model_info.params.get("top_k", None)
+
+            payload["options"]["top_p"] = model_info.params.get("top_p", None)
+
+        if model_info.params.get("system", None):
+            # Check if the payload already has a system message
+            # If not, add a system message to the payload
+            if payload.get("messages"):
+                for message in payload["messages"]:
+                    if message.get("role") == "system":
+                        message["content"] = (
+                            model_info.params.get("system", None) + message["content"]
+                        )
+                        break
+                else:
+                    payload["messages"].insert(
+                        0,
+                        {
+                            "role": "system",
+                            "content": model_info.params.get("system", None),
+                        },
+                    )
+
+    if url_idx == None:
+        if ":" not in payload["model"]:
+            payload["model"] = f"{payload['model']}:latest"
+
+        if payload["model"] in app.state.MODELS:
+            url_idx = random.choice(app.state.MODELS[payload["model"]]["urls"])
        else:
            raise HTTPException(
                status_code=400,
@@ -892,16 +975,12 @@ async def generate_chat_completion(
    url = app.state.config.OLLAMA_BASE_URLS[url_idx]
    log.info(f"url: {url}")

-    r = None
+    print(payload)

-    log.debug(
-        "form_data.model_dump_json(exclude_none=True).encode(): {0} ".format(
-            form_data.model_dump_json(exclude_none=True).encode()
-        )
-    )
+    r = None

    def get_request():
-        nonlocal form_data
+        nonlocal payload
        nonlocal r

        request_id = str(uuid.uuid4())
@@ -910,7 +989,7 @@ async def generate_chat_completion(

            def stream_content():
                try:
-                    if form_data.stream:
+                    if payload.get("stream", None):
                        yield json.dumps({"id": request_id, "done": False}) + "\n"

                    for chunk in r.iter_content(chunk_size=8192):
@@ -928,7 +1007,7 @@ async def generate_chat_completion(
            r = requests.request(
                method="POST",
                url=f"{url}/api/chat",
-                data=form_data.model_dump_json(exclude_none=True).encode(),
+                data=json.dumps(payload),
                stream=True,
            )

@@ -984,14 +1063,62 @@ async def generate_openai_chat_completion(
    user=Depends(get_verified_user),
 ):

-    if url_idx == None:
-        model = form_data.model
+    payload = {
+        **form_data.model_dump(exclude_none=True),
+    }

-        if ":" not in model:
-            model = f"{model}:latest"
+    model_id = form_data.model
+    model_info = Models.get_model_by_id(model_id)

-        if model in app.state.MODELS:
-            url_idx = random.choice(app.state.MODELS[model]["urls"])
+    if model_info:
+        print(model_info)
+        if model_info.base_model_id:
+            payload["model"] = model_info.base_model_id
+
+        model_info.params = model_info.params.model_dump()
+
+        if model_info.params:
+            payload["temperature"] = model_info.params.get("temperature", None)
+            payload["top_p"] = model_info.params.get("top_p", None)
+            payload["max_tokens"] = model_info.params.get("max_tokens", None)
+            payload["frequency_penalty"] = model_info.params.get(
+                "frequency_penalty", None
+            )
+            payload["seed"] = model_info.params.get("seed", None)
+            payload["stop"] = (
+                [
+                    bytes(stop, "utf-8").decode("unicode_escape")
+                    for stop in model_info.params["stop"]
+                ]
+                if model_info.params.get("stop", None)
+                else None
+            )
+
+        if model_info.params.get("system", None):
+            # Check if the payload already has a system message
+            # If not, add a system message to the payload
+            if payload.get("messages"):
+                for message in payload["messages"]:
+                    if message.get("role") == "system":
+                        message["content"] = (
+                            model_info.params.get("system", None) + message["content"]
+                        )
+                        break
+                else:
+                    payload["messages"].insert(
+                        0,
+                        {
+                            "role": "system",
+                            "content": model_info.params.get("system", None),
+                        },
+                    )
+
+    if url_idx == None:
+        if ":" not in payload["model"]:
+            payload["model"] = f"{payload['model']}:latest"
+
+        if payload["model"] in app.state.MODELS:
+            url_idx = random.choice(app.state.MODELS[payload["model"]]["urls"])
        else:
            raise HTTPException(
                status_code=400,
@@ -1004,7 +1131,7 @@ async def generate_openai_chat_completion(
    r = None

    def get_request():
-        nonlocal form_data
+        nonlocal payload
        nonlocal r

        request_id = str(uuid.uuid4())
@@ -1013,7 +1140,7 @@ async def generate_openai_chat_completion(

            def stream_content():
                try:
-                    if form_data.stream:
+                    if payload.get("stream"):
                        yield json.dumps(
                            {"request_id": request_id, "done": False}
                        ) + "\n"
@@ -1033,7 +1160,7 @@ async def generate_openai_chat_completion(
            r = requests.request(
                method="POST",
                url=f"{url}/v1/chat/completions",
-                data=form_data.model_dump_json(exclude_none=True).encode(),
+                data=json.dumps(payload),
                stream=True,
            )


--- a/backend/apps/openai/main.py
+++ b/backend/apps/openai/main.py
@@ -10,8 +10,8 @@ import logging

 from pydantic import BaseModel

-
-from apps.web.models.users import Users
+from apps.webui.models.models import Models
+from apps.webui.models.users import Users
 from constants import ERROR_MESSAGES
 from utils.utils import (
    decode_token,
@@ -53,7 +53,6 @@ app.state.config = AppConfig()
 app.state.config.ENABLE_MODEL_FILTER = ENABLE_MODEL_FILTER
 app.state.config.MODEL_FILTER_LIST = MODEL_FILTER_LIST

-
 app.state.config.ENABLE_OPENAI_API = ENABLE_OPENAI_API
 app.state.config.OPENAI_API_BASE_URLS = OPENAI_API_BASE_URLS
 app.state.config.OPENAI_API_KEYS = OPENAI_API_KEYS
@@ -206,7 +205,13 @@ def merge_models_lists(model_lists):
        if models is not None and "error" not in models:
            merged_list.extend(
                [
-                    {**model, "urlIdx": idx}
+                    {
+                        **model,
+                        "name": model.get("name", model["id"]),
+                        "owned_by": "openai",
+                        "openai": model,
+                        "urlIdx": idx,
+                    }
                    for model in models
                    if "api.openai.com"
                    not in app.state.config.OPENAI_API_BASE_URLS[idx]
@@ -310,39 +315,93 @@ async def proxy(path: str, request: Request, user=Depends(get_verified_user)):
    body = await request.body()
    # TODO: Remove below after gpt-4-vision fix from Open AI
    # Try to decode the body of the request from bytes to a UTF-8 string (Require add max_token to fix gpt-4-vision)
+
+    payload = None
+
    try:
+        if "chat/completions" in path:
            body = body.decode("utf-8")
            body = json.loads(body)

-        model = app.state.MODELS[body.get("model")]
+            payload = {**body}
+
+            model_id = body.get("model")
+            model_info = Models.get_model_by_id(model_id)
+
+            if model_info:
+                print(model_info)
+                if model_info.base_model_id:
+                    payload["model"] = model_info.base_model_id
+
+                model_info.params = model_info.params.model_dump()
+
+                if model_info.params:
+                    payload["temperature"] = model_info.params.get("temperature", None)
+                    payload["top_p"] = model_info.params.get("top_p", None)
+                    payload["max_tokens"] = model_info.params.get("max_tokens", None)
+                    payload["frequency_penalty"] = model_info.params.get(
+                        "frequency_penalty", None
+                    )
+                    payload["seed"] = model_info.params.get("seed", None)
+                    payload["stop"] = (
+                        [
+                            bytes(stop, "utf-8").decode("unicode_escape")
+                            for stop in model_info.params["stop"]
+                        ]
+                        if model_info.params.get("stop", None)
+                        else None
+                    )
+
+                if model_info.params.get("system", None):
+                    # Check if the payload already has a system message
+                    # If not, add a system message to the payload
+                    if payload.get("messages"):
+                        for message in payload["messages"]:
+                            if message.get("role") == "system":
+                                message["content"] = (
+                                    model_info.params.get("system", None)
+                                    + message["content"]
+                                )
+                                break
+                        else:
+                            payload["messages"].insert(
+                                0,
+                                {
+                                    "role": "system",
+                                    "content": model_info.params.get("system", None),
+                                },
+                            )
+            else:
+                pass
+
+            print(app.state.MODELS)
+            model = app.state.MODELS[payload.get("model")]

            idx = model["urlIdx"]

            if "pipeline" in model and model.get("pipeline"):
-            body["user"] = {"name": user.name, "id": user.id}
-            body["title"] = (
-                True if body["stream"] == False and body["max_tokens"] == 50 else False
+                payload["user"] = {"name": user.name, "id": user.id}
+                payload["title"] = (
+                    True
+                    if payload["stream"] == False and payload["max_tokens"] == 50
+                    else False
                )

            # Check if the model is "gpt-4-vision-preview" and set "max_tokens" to 4000
            # This is a workaround until OpenAI fixes the issue with this model
-        if body.get("model") == "gpt-4-vision-preview":
-            if "max_tokens" not in body:
-                body["max_tokens"] = 4000
-            log.debug("Modified body_dict:", body)
-
-        # Fix for ChatGPT calls failing because the num_ctx key is in body
-        if "num_ctx" in body:
-            # If 'num_ctx' is in the dictionary, delete it
-            # Leaving it there generates an error with the
-            # OpenAI API (Feb 2024)
-            del body["num_ctx"]
+            if payload.get("model") == "gpt-4-vision-preview":
+                if "max_tokens" not in payload:
+                    payload["max_tokens"] = 4000
+                log.debug("Modified payload:", payload)

            # Convert the modified body back to JSON
-        body = json.dumps(body)
+            payload = json.dumps(payload)
+
    except json.JSONDecodeError as e:
        log.error("Error loading request body into a dictionary:", e)

+    print(payload)
+
    url = app.state.config.OPENAI_API_BASE_URLS[idx]
    key = app.state.config.OPENAI_API_KEYS[idx]

@@ -361,7 +420,7 @@ async def proxy(path: str, request: Request, user=Depends(get_verified_user)):
        r = requests.request(
            method=request.method,
            url=target_url,
-            data=body,
+            data=payload if payload else body,
            headers=headers,
            stream=True,
        )

--- a/backend/apps/rag/main.py
+++ b/backend/apps/rag/main.py
@@ -46,7 +46,7 @@ import json

 import sentence_transformers

-from apps.web.models.documents import (
+from apps.webui.models.documents import (
    Documents,
    DocumentForm,
    DocumentResponse,

--- a/backend/apps/web/models/modelfiles.py
+++ b/backend/apps/web/models/modelfiles.py
-from pydantic import BaseModel
-from peewee import *
-from playhouse.shortcuts import model_to_dict
-from typing import List, Union, Optional
-import time
-
-from utils.utils import decode_token
-from utils.misc import get_gravatar_url
-
-from apps.web.internal.db import DB
-
-import json
-
-####################
-# Modelfile DB Schema
-####################
-
-
-class Modelfile(Model):
-    tag_name = CharField(unique=True)
-    user_id = CharField()
-    modelfile = TextField()
-    timestamp = BigIntegerField()
-
-    class Meta:
-        database = DB
-
-
-class ModelfileModel(BaseModel):
-    tag_name: str
-    user_id: str
-    modelfile: str
-    timestamp: int  # timestamp in epoch
-
-
-####################
-# Forms
-####################
-
-
-class ModelfileForm(BaseModel):
-    modelfile: dict
-
-
-class ModelfileTagNameForm(BaseModel):
-    tag_name: str
-
-
-class ModelfileUpdateForm(ModelfileForm, ModelfileTagNameForm):
-    pass
-
-
-class ModelfileResponse(BaseModel):
-    tag_name: str
-    user_id: str
-    modelfile: dict
-    timestamp: int  # timestamp in epoch
-
-
-class ModelfilesTable:
-
-    def __init__(self, db):
-        self.db = db
-        self.db.create_tables([Modelfile])
-
-    def insert_new_modelfile(
-        self, user_id: str, form_data: ModelfileForm
-    ) -> Optional[ModelfileModel]:
-        if "tagName" in form_data.modelfile:
-            modelfile = ModelfileModel(
-                **{
-                    "user_id": user_id,
-                    "tag_name": form_data.modelfile["tagName"],
-                    "modelfile": json.dumps(form_data.modelfile),
-                    "timestamp": int(time.time()),
-                }
-            )
-
-            try:
-                result = Modelfile.create(**modelfile.model_dump())
-                if result:
-                    return modelfile
-                else:
-                    return None
-            except:
-                return None
-
-        else:
-            return None
-
-    def get_modelfile_by_tag_name(self, tag_name: str) -> Optional[ModelfileModel]:
-        try:
-            modelfile = Modelfile.get(Modelfile.tag_name == tag_name)
-            return ModelfileModel(**model_to_dict(modelfile))
-        except:
-            return None
-
-    def get_modelfiles(self, skip: int = 0, limit: int = 50) -> List[ModelfileResponse]:
-        return [
-            ModelfileResponse(
-                **{
-                    **model_to_dict(modelfile),
-                    "modelfile": json.loads(modelfile.modelfile),
-                }
-            )
-            for modelfile in Modelfile.select()
-            # .limit(limit).offset(skip)
-        ]
-
-    def update_modelfile_by_tag_name(
-        self, tag_name: str, modelfile: dict
-    ) -> Optional[ModelfileModel]:
-        try:
-            query = Modelfile.update(
-                modelfile=json.dumps(modelfile),
-                timestamp=int(time.time()),
-            ).where(Modelfile.tag_name == tag_name)
-
-            query.execute()
-
-            modelfile = Modelfile.get(Modelfile.tag_name == tag_name)
-            return ModelfileModel(**model_to_dict(modelfile))
-        except:
-            return None
-
-    def delete_modelfile_by_tag_name(self, tag_name: str) -> bool:
-        try:
-            query = Modelfile.delete().where((Modelfile.tag_name == tag_name))
-            query.execute()  # Remove the rows, return number of rows removed.
-
-            return True
-        except:
-            return False
-
-
-Modelfiles = ModelfilesTable(DB)
--- a/backend/apps/web/routers/modelfiles.py
+++ b/backend/apps/web/routers/modelfiles.py
-from fastapi import Depends, FastAPI, HTTPException, status
-from datetime import datetime, timedelta
-from typing import List, Union, Optional
-
-from fastapi import APIRouter
-from pydantic import BaseModel
-import json
-from apps.web.models.modelfiles import (
-    Modelfiles,
-    ModelfileForm,
-    ModelfileTagNameForm,
-    ModelfileUpdateForm,
-    ModelfileResponse,
-)
-
-from utils.utils import get_current_user, get_admin_user
-from constants import ERROR_MESSAGES
-
-router = APIRouter()
-
-############################
-# GetModelfiles
-############################
-
-
-@router.get("/", response_model=List[ModelfileResponse])
-async def get_modelfiles(
-    skip: int = 0, limit: int = 50, user=Depends(get_current_user)
-):
-    return Modelfiles.get_modelfiles(skip, limit)
-
-
-############################
-# CreateNewModelfile
-############################
-
-
-@router.post("/create", response_model=Optional[ModelfileResponse])
-async def create_new_modelfile(form_data: ModelfileForm, user=Depends(get_admin_user)):
-    modelfile = Modelfiles.insert_new_modelfile(user.id, form_data)
-
-    if modelfile:
-        return ModelfileResponse(
-            **{
-                **modelfile.model_dump(),
-                "modelfile": json.loads(modelfile.modelfile),
-            }
-        )
-    else:
-        raise HTTPException(
-            status_code=status.HTTP_401_UNAUTHORIZED,
-            detail=ERROR_MESSAGES.DEFAULT(),
-        )
-
-
-############################
-# GetModelfileByTagName
-############################
-
-
-@router.post("/", response_model=Optional[ModelfileResponse])
-async def get_modelfile_by_tag_name(
-    form_data: ModelfileTagNameForm, user=Depends(get_current_user)
-):
-    modelfile = Modelfiles.get_modelfile_by_tag_name(form_data.tag_name)
-
-    if modelfile:
-        return ModelfileResponse(
-            **{
-                **modelfile.model_dump(),
-                "modelfile": json.loads(modelfile.modelfile),
-            }
-        )
-    else:
-        raise HTTPException(
-            status_code=status.HTTP_401_UNAUTHORIZED,
-            detail=ERROR_MESSAGES.NOT_FOUND,
-        )
-
-
-############################
-# UpdateModelfileByTagName
-############################
-
-
-@router.post("/update", response_model=Optional[ModelfileResponse])
-async def update_modelfile_by_tag_name(
-    form_data: ModelfileUpdateForm, user=Depends(get_admin_user)
-):
-    modelfile = Modelfiles.get_modelfile_by_tag_name(form_data.tag_name)
-    if modelfile:
-        updated_modelfile = {
-            **json.loads(modelfile.modelfile),
-            **form_data.modelfile,
-        }
-
-        modelfile = Modelfiles.update_modelfile_by_tag_name(
-            form_data.tag_name, updated_modelfile
-        )
-
-        return ModelfileResponse(
-            **{
-                **modelfile.model_dump(),
-                "modelfile": json.loads(modelfile.modelfile),
-            }
-        )
-    else:
-        raise HTTPException(
-            status_code=status.HTTP_401_UNAUTHORIZED,
-            detail=ERROR_MESSAGES.ACCESS_PROHIBITED,
-        )
-
-
-############################
-# DeleteModelfileByTagName
-############################
-
-
-@router.delete("/delete", response_model=bool)
-async def delete_modelfile_by_tag_name(
-    form_data: ModelfileTagNameForm, user=Depends(get_admin_user)
-):
-    result = Modelfiles.delete_modelfile_by_tag_name(form_data.tag_name)
-    return result
--- a/backend/apps/web/internal/db.py
+++ b/backend/apps/web/internal/db.py
+import json
+
 from peewee import *
 from peewee_migrate import Router
 from playhouse.db_url import connect
@@ -8,6 +10,16 @@ import logging
 log = logging.getLogger(__name__)
 log.setLevel(SRC_LOG_LEVELS["DB"])

+
+class JSONField(TextField):
+    def db_value(self, value):
+        return json.dumps(value)
+
+    def python_value(self, value):
+        if value is not None:
+            return json.loads(value)
+
+
 # Check if the file exists
 if os.path.exists(f"{DATA_DIR}/ollama.db"):
    # Rename the file
@@ -19,7 +31,9 @@ else:
 DB = connect(DATABASE_URL)
 log.info(f"Connected to a {DB.__class__.__name__} database.")
 router = Router(
-    DB, migrate_dir=BACKEND_DIR / "apps" / "web" / "internal" / "migrations", logger=log
+    DB,
+    migrate_dir=BACKEND_DIR / "apps" / "webui" / "internal" / "migrations",
+    logger=log,
 )
 router.run()
 DB.connect(reuse_if_open=True)
--- a/backend/apps/web/internal/migrations/001_initial_schema.py
+++ b/backend/apps/web/internal/migrations/001_initial_schema.py
--- a/backend/apps/web/internal/migrations/002_add_local_sharing.py
+++ b/backend/apps/web/internal/migrations/002_add_local_sharing.py
--- a/backend/apps/web/internal/migrations/003_add_auth_api_key.py
+++ b/backend/apps/web/internal/migrations/003_add_auth_api_key.py
--- a/backend/apps/web/internal/migrations/004_add_archived.py
+++ b/backend/apps/web/internal/migrations/004_add_archived.py
--- a/backend/apps/web/internal/migrations/005_add_updated_at.py
+++ b/backend/apps/web/internal/migrations/005_add_updated_at.py
--- a/backend/apps/web/internal/migrations/006_migrate_timestamps_and_charfields.py
+++ b/backend/apps/web/internal/migrations/006_migrate_timestamps_and_charfields.py
--- a/backend/apps/web/internal/migrations/007_add_user_last_active_at.py
+++ b/backend/apps/web/internal/migrations/007_add_user_last_active_at.py
--- a/backend/apps/web/internal/migrations/008_add_memory.py
+++ b/backend/apps/web/internal/migrations/008_add_memory.py
--- a/backend/apps/webui/internal/migrations/009_add_models.py
+++ b/backend/apps/webui/internal/migrations/009_add_models.py
+"""Peewee migrations -- 009_add_models.py.
+
+Some examples (model - class or model name)::
+
+    > Model = migrator.orm['table_name']            # Return model in current state by name
+    > Model = migrator.ModelClass                   # Return model in current state by name
+
+    > migrator.sql(sql)                             # Run custom SQL
+    > migrator.run(func, *args, **kwargs)           # Run python function with the given args
+    > migrator.create_model(Model)                  # Create a model (could be used as decorator)
+    > migrator.remove_model(model, cascade=True)    # Remove a model
+    > migrator.add_fields(model, **fields)          # Add fields to a model
+    > migrator.change_fields(model, **fields)       # Change fields
+    > migrator.remove_fields(model, *field_names, cascade=True)
+    > migrator.rename_field(model, old_field_name, new_field_name)
+    > migrator.rename_table(model, new_table_name)
+    > migrator.add_index(model, *col_names, unique=False)
+    > migrator.add_not_null(model, *field_names)
+    > migrator.add_default(model, field_name, default)
+    > migrator.add_constraint(model, name, sql)
+    > migrator.drop_index(model, *col_names)
+    > migrator.drop_not_null(model, *field_names)
+    > migrator.drop_constraints(model, *constraints)
+
+"""
+
+from contextlib import suppress
+
+import peewee as pw
+from peewee_migrate import Migrator
+
+
+with suppress(ImportError):
+    import playhouse.postgres_ext as pw_pext
+
+
+def migrate(migrator: Migrator, database: pw.Database, *, fake=False):
+    """Write your migrations here."""
+
+    @migrator.create_model
+    class Model(pw.Model):
+        id = pw.TextField(unique=True)
+        user_id = pw.TextField()
+        base_model_id = pw.TextField(null=True)
+
+        name = pw.TextField()
+
+        meta = pw.TextField()
+        params = pw.TextField()
+
+        created_at = pw.BigIntegerField(null=False)
+        updated_at = pw.BigIntegerField(null=False)
+
+        class Meta:
+            table_name = "model"
+
+
+def rollback(migrator: Migrator, database: pw.Database, *, fake=False):
+    """Write your rollback migrations here."""
+
+    migrator.remove_model("model")
--- a/backend/apps/webui/internal/migrations/010_migrate_modelfiles_to_models.py
+++ b/backend/apps/webui/internal/migrations/010_migrate_modelfiles_to_models.py
+"""Peewee migrations -- 009_add_models.py.
+
+Some examples (model - class or model name)::
+
+    > Model = migrator.orm['table_name']            # Return model in current state by name
+    > Model = migrator.ModelClass                   # Return model in current state by name
+
+    > migrator.sql(sql)                             # Run custom SQL
+    > migrator.run(func, *args, **kwargs)           # Run python function with the given args
+    > migrator.create_model(Model)                  # Create a model (could be used as decorator)
+    > migrator.remove_model(model, cascade=True)    # Remove a model
+    > migrator.add_fields(model, **fields)          # Add fields to a model
+    > migrator.change_fields(model, **fields)       # Change fields
+    > migrator.remove_fields(model, *field_names, cascade=True)
+    > migrator.rename_field(model, old_field_name, new_field_name)
+    > migrator.rename_table(model, new_table_name)
+    > migrator.add_index(model, *col_names, unique=False)
+    > migrator.add_not_null(model, *field_names)
+    > migrator.add_default(model, field_name, default)
+    > migrator.add_constraint(model, name, sql)
+    > migrator.drop_index(model, *col_names)
+    > migrator.drop_not_null(model, *field_names)
+    > migrator.drop_constraints(model, *constraints)
+
+"""
+
+from contextlib import suppress
+
+import peewee as pw
+from peewee_migrate import Migrator
+import json
+
+from utils.misc import parse_ollama_modelfile
+
+with suppress(ImportError):
+    import playhouse.postgres_ext as pw_pext
+
+
+def migrate(migrator: Migrator, database: pw.Database, *, fake=False):
+    """Write your migrations here."""
+
+    # Fetch data from 'modelfile' table and insert into 'model' table
+    migrate_modelfile_to_model(migrator, database)
+    # Drop the 'modelfile' table
+    migrator.remove_model("modelfile")
+
+
+def migrate_modelfile_to_model(migrator: Migrator, database: pw.Database):
+    ModelFile = migrator.orm["modelfile"]
+    Model = migrator.orm["model"]
+
+    modelfiles = ModelFile.select()
+
+    for modelfile in modelfiles:
+        # Extract and transform data in Python
+
+        modelfile.modelfile = json.loads(modelfile.modelfile)
+        meta = json.dumps(
+            {
+                "description": modelfile.modelfile.get("desc"),
+                "profile_image_url": modelfile.modelfile.get("imageUrl"),
+                "ollama": {"modelfile": modelfile.modelfile.get("content")},
+                "suggestion_prompts": modelfile.modelfile.get("suggestionPrompts"),
+                "categories": modelfile.modelfile.get("categories"),
+                "user": {**modelfile.modelfile.get("user", {}), "community": True},
+            }
+        )
+
+        info = parse_ollama_modelfile(modelfile.modelfile.get("content"))
+
+        # Insert the processed data into the 'model' table
+        Model.create(
+            id=f"ollama-{modelfile.tag_name}",
+            user_id=modelfile.user_id,
+            base_model_id=info.get("base_model_id"),
+            name=modelfile.modelfile.get("title"),
+            meta=meta,
+            params=json.dumps(info.get("params", {})),
+            created_at=modelfile.timestamp,
+            updated_at=modelfile.timestamp,
+        )
+
+
+def rollback(migrator: Migrator, database: pw.Database, *, fake=False):
+    """Write your rollback migrations here."""
+
+    recreate_modelfile_table(migrator, database)
+    move_data_back_to_modelfile(migrator, database)
+    migrator.remove_model("model")
+
+
+def recreate_modelfile_table(migrator: Migrator, database: pw.Database):
+    query = """
+    CREATE TABLE IF NOT EXISTS modelfile (
+        user_id TEXT,
+        tag_name TEXT,
+        modelfile JSON,
+        timestamp BIGINT
+    )
+    """
+    migrator.sql(query)
+
+
+def move_data_back_to_modelfile(migrator: Migrator, database: pw.Database):
+    Model = migrator.orm["model"]
+    Modelfile = migrator.orm["modelfile"]
+
+    models = Model.select()
+
+    for model in models:
+        # Extract and transform data in Python
+        meta = json.loads(model.meta)
+
+        modelfile_data = {
+            "title": model.name,
+            "desc": meta.get("description"),
+            "imageUrl": meta.get("profile_image_url"),
+            "content": meta.get("ollama", {}).get("modelfile"),
+            "suggestionPrompts": meta.get("suggestion_prompts"),
+            "categories": meta.get("categories"),
+            "user": {k: v for k, v in meta.get("user", {}).items() if k != "community"},
+        }
+
+        # Insert the processed data back into the 'modelfile' table
+        Modelfile.create(
+            user_id=model.user_id,
+            tag_name=model.id,
+            modelfile=modelfile_data,
+            timestamp=model.created_at,
+        )
--- a/backend/apps/web/internal/migrations/README.md
+++ b/backend/apps/web/internal/migrations/README.md
@@ -14,7 +14,7 @@ You will need to create a migration file to ensure that existing databases are u
 2. Make your changes to the models.
 3. From the `backend` directory, run the following command:
   ```bash
-   pw_migrate create --auto --auto-source apps.web.models --database sqlite:///${SQLITE_DB} --directory apps/web/internal/migrations ${MIGRATION_NAME}
+   pw_migrate create --auto --auto-source apps.webui.models --database sqlite:///${SQLITE_DB} --directory apps/web/internal/migrations ${MIGRATION_NAME}
   ```
   - `$SQLITE_DB` should be the path to the database file.
   - `$MIGRATION_NAME` should be a descriptive name for the migration.