Merge remote-tracking branch 'upstream/dev' into feat/add-i18n

25e0f0de · Ased Mammad · df8aeb39 · 3455f899 · 25e0f0de · 25e0f0de
Commit 25e0f0de authored Mar 06, 2024 by Ased Mammad
20 changed files
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -32,7 +32,7 @@ assignees: ''
 **Confirmation:**

 - [ ] I have read and followed all the instructions provided in the README.md.
- [ ] I have reviewed the troubleshooting.md document.
+- [ ] I am on the latest version of both Open WebUI and Ollama.
 - [ ] I have included the browser console logs.
 - [ ] I have included the Docker container logs.


--- a/backend/apps/ollama/main.py
+++ b/backend/apps/ollama/main.py
@@ -3,16 +3,23 @@ from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import StreamingResponse
 from fastapi.concurrency import run_in_threadpool

+from pydantic import BaseModel, ConfigDict
+
+import random
 import requests
 import json
 import uuid
-from pydantic import BaseModel
+import aiohttp
+import asyncio

 from apps.web.models.users import Users
 from constants import ERROR_MESSAGES
 from utils.utils import decode_token, get_current_user, get_admin_user
 from config import OLLAMA_BASE_URL, WEBUI_AUTH

+from typing import Optional, List, Union
+
+
 app = FastAPI()
 app.add_middleware(
    CORSMiddleware,
@@ -23,26 +30,44 @@ app.add_middleware(
 )

 app.state.OLLAMA_BASE_URL = OLLAMA_BASE_URL
-
-# TARGET_SERVER_URL = OLLAMA_API_BASE_URL
+app.state.OLLAMA_BASE_URLS = [OLLAMA_BASE_URL]
+app.state.MODELS = {}


 REQUEST_POOL = []


-@app.get("/url")
-async def get_ollama_api_url(user=Depends(get_admin_user)):
-    return {"OLLAMA_BASE_URL": app.state.OLLAMA_BASE_URL}
+# TODO: Implement a more intelligent load balancing mechanism for distributing requests among multiple backend instances.
+# Current implementation uses a simple round-robin approach (random.choice). Consider incorporating algorithms like weighted round-robin,
+# least connections, or least response time for better resource utilization and performance optimization.
+
+
+@app.middleware("http")
+async def check_url(request: Request, call_next):
+    if len(app.state.MODELS) == 0:
+        await get_all_models()
+    else:
+        pass
+
+    response = await call_next(request)
+    return response
+
+
+@app.get("/urls")
+async def get_ollama_api_urls(user=Depends(get_admin_user)):
+    return {"OLLAMA_BASE_URLS": app.state.OLLAMA_BASE_URLS}


 class UrlUpdateForm(BaseModel):
-    url: str
+    urls: List[str]


-@app.post("/url/update")
+@app.post("/urls/update")
 async def update_ollama_api_url(form_data: UrlUpdateForm, user=Depends(get_admin_user)):
-    app.state.OLLAMA_BASE_URL = form_data.url
-    return {"OLLAMA_BASE_URL": app.state.OLLAMA_BASE_URL}
+    app.state.OLLAMA_BASE_URLS = form_data.urls
+
+    print(app.state.OLLAMA_BASE_URLS)
+    return {"OLLAMA_BASE_URLS": app.state.OLLAMA_BASE_URLS}


 @app.get("/cancel/{request_id}")
@@ -55,9 +80,806 @@ async def cancel_ollama_request(request_id: str, user=Depends(get_current_user))
        raise HTTPException(status_code=401, detail=ERROR_MESSAGES.ACCESS_PROHIBITED)


+async def fetch_url(url):
+    try:
+        async with aiohttp.ClientSession() as session:
+            async with session.get(url) as response:
+                return await response.json()
+    except Exception as e:
+        # Handle connection error here
+        print(f"Connection error: {e}")
+        return None
+
+
+def merge_models_lists(model_lists):
+    merged_models = {}
+
+    for idx, model_list in enumerate(model_lists):
+        for model in model_list:
+            digest = model["digest"]
+            if digest not in merged_models:
+                model["urls"] = [idx]
+                merged_models[digest] = model
+            else:
+                merged_models[digest]["urls"].append(idx)
+
+    return list(merged_models.values())
+
+
+# user=Depends(get_current_user)
+
+
+async def get_all_models():
+    print("get_all_models")
+    tasks = [fetch_url(f"{url}/api/tags") for url in app.state.OLLAMA_BASE_URLS]
+    responses = await asyncio.gather(*tasks)
+    responses = list(filter(lambda x: x is not None, responses))
+
+    models = {
+        "models": merge_models_lists(
+            map(lambda response: response["models"], responses)
+        )
+    }
+    app.state.MODELS = {model["model"]: model for model in models["models"]}
+
+    return models
+
+
+@app.get("/api/tags")
+@app.get("/api/tags/{url_idx}")
+async def get_ollama_tags(
+    url_idx: Optional[int] = None, user=Depends(get_current_user)
+):
+
+    if url_idx == None:
+        return await get_all_models()
+    else:
+        url = app.state.OLLAMA_BASE_URLS[url_idx]
+        try:
+            r = requests.request(method="GET", url=f"{url}/api/tags")
+            r.raise_for_status()
+
+            return r.json()
+        except Exception as e:
+            print(e)
+            error_detail = "Open WebUI: Server Connection Error"
+            if r is not None:
+                try:
+                    res = r.json()
+                    if "error" in res:
+                        error_detail = f"Ollama: {res['error']}"
+                except:
+                    error_detail = f"Ollama: {e}"
+
+            raise HTTPException(
+                status_code=r.status_code if r else 500,
+                detail=error_detail,
+            )
+
+
+@app.get("/api/version")
+@app.get("/api/version/{url_idx}")
+async def get_ollama_versions(url_idx: Optional[int] = None):
+
+    if url_idx == None:
+
+        # returns lowest version
+        tasks = [fetch_url(f"{url}/api/version") for url in app.state.OLLAMA_BASE_URLS]
+        responses = await asyncio.gather(*tasks)
+        responses = list(filter(lambda x: x is not None, responses))
+
+        lowest_version = min(
+            responses, key=lambda x: tuple(map(int, x["version"].split(".")))
+        )
+
+        return {"version": lowest_version["version"]}
+    else:
+        url = app.state.OLLAMA_BASE_URLS[url_idx]
+        try:
+            r = requests.request(method="GET", url=f"{url}/api/version")
+            r.raise_for_status()
+
+            return r.json()
+        except Exception as e:
+            print(e)
+            error_detail = "Open WebUI: Server Connection Error"
+            if r is not None:
+                try:
+                    res = r.json()
+                    if "error" in res:
+                        error_detail = f"Ollama: {res['error']}"
+                except:
+                    error_detail = f"Ollama: {e}"
+
+            raise HTTPException(
+                status_code=r.status_code if r else 500,
+                detail=error_detail,
+            )
+
+
+class ModelNameForm(BaseModel):
+    name: str
+
+
+@app.post("/api/pull")
+@app.post("/api/pull/{url_idx}")
+async def pull_model(
+    form_data: ModelNameForm, url_idx: int = 0, user=Depends(get_admin_user)
+):
+    url = app.state.OLLAMA_BASE_URLS[url_idx]
+    print(url)
+
+    r = None
+
+    def get_request():
+        nonlocal url
+        nonlocal r
+        try:
+
+            def stream_content():
+                for chunk in r.iter_content(chunk_size=8192):
+                    yield chunk
+
+            r = requests.request(
+                method="POST",
+                url=f"{url}/api/pull",
+                data=form_data.model_dump_json(exclude_none=True),
+                stream=True,
+            )
+
+            r.raise_for_status()
+
+            return StreamingResponse(
+                stream_content(),
+                status_code=r.status_code,
+                headers=dict(r.headers),
+            )
+        except Exception as e:
+            raise e
+
+    try:
+        return await run_in_threadpool(get_request)
+    except Exception as e:
+        print(e)
+        error_detail = "Open WebUI: Server Connection Error"
+        if r is not None:
+            try:
+                res = r.json()
+                if "error" in res:
+                    error_detail = f"Ollama: {res['error']}"
+            except:
+                error_detail = f"Ollama: {e}"
+
+        raise HTTPException(
+            status_code=r.status_code if r else 500,
+            detail=error_detail,
+        )
+
+
+class PushModelForm(BaseModel):
+    name: str
+    insecure: Optional[bool] = None
+    stream: Optional[bool] = None
+
+
+@app.delete("/api/push")
+@app.delete("/api/push/{url_idx}")
+async def push_model(
+    form_data: PushModelForm,
+    url_idx: Optional[int] = None,
+    user=Depends(get_admin_user),
+):
+    if url_idx == None:
+        if form_data.name in app.state.MODELS:
+            url_idx = app.state.MODELS[form_data.name]["urls"][0]
+        else:
+            raise HTTPException(
+                status_code=400,
+                detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.name),
+            )
+
+    url = app.state.OLLAMA_BASE_URLS[url_idx]
+    print(url)
+
+    r = None
+
+    def get_request():
+        nonlocal url
+        nonlocal r
+        try:
+
+            def stream_content():
+                for chunk in r.iter_content(chunk_size=8192):
+                    yield chunk
+
+            r = requests.request(
+                method="POST",
+                url=f"{url}/api/push",
+                data=form_data.model_dump_json(exclude_none=True),
+            )
+
+            r.raise_for_status()
+
+            return StreamingResponse(
+                stream_content(),
+                status_code=r.status_code,
+                headers=dict(r.headers),
+            )
+        except Exception as e:
+            raise e
+
+    try:
+        return await run_in_threadpool(get_request)
+    except Exception as e:
+        print(e)
+        error_detail = "Open WebUI: Server Connection Error"
+        if r is not None:
+            try:
+                res = r.json()
+                if "error" in res:
+                    error_detail = f"Ollama: {res['error']}"
+            except:
+                error_detail = f"Ollama: {e}"
+
+        raise HTTPException(
+            status_code=r.status_code if r else 500,
+            detail=error_detail,
+        )
+
+
+class CreateModelForm(BaseModel):
+    name: str
+    modelfile: Optional[str] = None
+    stream: Optional[bool] = None
+    path: Optional[str] = None
+
+
+@app.post("/api/create")
+@app.post("/api/create/{url_idx}")
+async def create_model(
+    form_data: CreateModelForm, url_idx: int = 0, user=Depends(get_admin_user)
+):
+    print(form_data)
+    url = app.state.OLLAMA_BASE_URLS[url_idx]
+    print(url)
+
+    r = None
+
+    def get_request():
+        nonlocal url
+        nonlocal r
+        try:
+
+            def stream_content():
+                for chunk in r.iter_content(chunk_size=8192):
+                    yield chunk
+
+            r = requests.request(
+                method="POST",
+                url=f"{url}/api/create",
+                data=form_data.model_dump_json(exclude_none=True),
+                stream=True,
+            )
+
+            r.raise_for_status()
+
+            print(r)
+
+            return StreamingResponse(
+                stream_content(),
+                status_code=r.status_code,
+                headers=dict(r.headers),
+            )
+        except Exception as e:
+            raise e
+
+    try:
+        return await run_in_threadpool(get_request)
+    except Exception as e:
+        print(e)
+        error_detail = "Open WebUI: Server Connection Error"
+        if r is not None:
+            try:
+                res = r.json()
+                if "error" in res:
+                    error_detail = f"Ollama: {res['error']}"
+            except:
+                error_detail = f"Ollama: {e}"
+
+        raise HTTPException(
+            status_code=r.status_code if r else 500,
+            detail=error_detail,
+        )
+
+
+class CopyModelForm(BaseModel):
+    source: str
+    destination: str
+
+
+@app.post("/api/copy")
+@app.post("/api/copy/{url_idx}")
+async def copy_model(
+    form_data: CopyModelForm,
+    url_idx: Optional[int] = None,
+    user=Depends(get_admin_user),
+):
+    if url_idx == None:
+        if form_data.source in app.state.MODELS:
+            url_idx = app.state.MODELS[form_data.source]["urls"][0]
+        else:
+            raise HTTPException(
+                status_code=400,
+                detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.source),
+            )
+
+    url = app.state.OLLAMA_BASE_URLS[url_idx]
+    print(url)
+
+    try:
+        r = requests.request(
+            method="POST",
+            url=f"{url}/api/copy",
+            data=form_data.model_dump_json(exclude_none=True),
+        )
+        r.raise_for_status()
+
+        print(r.text)
+
+        return True
+    except Exception as e:
+        print(e)
+        error_detail = "Open WebUI: Server Connection Error"
+        if r is not None:
+            try:
+                res = r.json()
+                if "error" in res:
+                    error_detail = f"Ollama: {res['error']}"
+            except:
+                error_detail = f"Ollama: {e}"
+
+        raise HTTPException(
+            status_code=r.status_code if r else 500,
+            detail=error_detail,
+        )
+
+
+@app.delete("/api/delete")
+@app.delete("/api/delete/{url_idx}")
+async def delete_model(
+    form_data: ModelNameForm,
+    url_idx: Optional[int] = None,
+    user=Depends(get_admin_user),
+):
+    if url_idx == None:
+        if form_data.name in app.state.MODELS:
+            url_idx = app.state.MODELS[form_data.name]["urls"][0]
+        else:
+            raise HTTPException(
+                status_code=400,
+                detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.name),
+            )
+
+    url = app.state.OLLAMA_BASE_URLS[url_idx]
+    print(url)
+
+    try:
+        r = requests.request(
+            method="DELETE",
+            url=f"{url}/api/delete",
+            data=form_data.model_dump_json(exclude_none=True),
+        )
+        r.raise_for_status()
+
+        print(r.text)
+
+        return True
+    except Exception as e:
+        print(e)
+        error_detail = "Open WebUI: Server Connection Error"
+        if r is not None:
+            try:
+                res = r.json()
+                if "error" in res:
+                    error_detail = f"Ollama: {res['error']}"
+            except:
+                error_detail = f"Ollama: {e}"
+
+        raise HTTPException(
+            status_code=r.status_code if r else 500,
+            detail=error_detail,
+        )
+
+
+@app.post("/api/show")
+async def show_model_info(form_data: ModelNameForm, user=Depends(get_current_user)):
+    if form_data.name not in app.state.MODELS:
+        raise HTTPException(
+            status_code=400,
+            detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.name),
+        )
+
+    url_idx = random.choice(app.state.MODELS[form_data.name]["urls"])
+    url = app.state.OLLAMA_BASE_URLS[url_idx]
+    print(url)
+
+    try:
+        r = requests.request(
+            method="POST",
+            url=f"{url}/api/show",
+            data=form_data.model_dump_json(exclude_none=True),
+        )
+        r.raise_for_status()
+
+        return r.json()
+    except Exception as e:
+        print(e)
+        error_detail = "Open WebUI: Server Connection Error"
+        if r is not None:
+            try:
+                res = r.json()
+                if "error" in res:
+                    error_detail = f"Ollama: {res['error']}"
+            except:
+                error_detail = f"Ollama: {e}"
+
+        raise HTTPException(
+            status_code=r.status_code if r else 500,
+            detail=error_detail,
+        )
+
+
+class GenerateEmbeddingsForm(BaseModel):
+    model: str
+    prompt: str
+    options: Optional[dict] = None
+    keep_alive: Optional[Union[int, str]] = None
+
+
+@app.post("/api/embeddings")
+@app.post("/api/embeddings/{url_idx}")
+async def generate_embeddings(
+    form_data: GenerateEmbeddingsForm,
+    url_idx: Optional[int] = None,
+    user=Depends(get_current_user),
+):
+    if url_idx == None:
+        if form_data.model in app.state.MODELS:
+            url_idx = random.choice(app.state.MODELS[form_data.model]["urls"])
+        else:
+            raise HTTPException(
+                status_code=400,
+                detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.model),
+            )
+
+    url = app.state.OLLAMA_BASE_URLS[url_idx]
+    print(url)
+
+    try:
+        r = requests.request(
+            method="POST",
+            url=f"{url}/api/embeddings",
+            data=form_data.model_dump_json(exclude_none=True),
+        )
+        r.raise_for_status()
+
+        return r.json()
+    except Exception as e:
+        print(e)
+        error_detail = "Open WebUI: Server Connection Error"
+        if r is not None:
+            try:
+                res = r.json()
+                if "error" in res:
+                    error_detail = f"Ollama: {res['error']}"
+            except:
+                error_detail = f"Ollama: {e}"
+
+        raise HTTPException(
+            status_code=r.status_code if r else 500,
+            detail=error_detail,
+        )
+
+
+class GenerateCompletionForm(BaseModel):
+    model: str
+    prompt: str
+    images: Optional[List[str]] = None
+    format: Optional[str] = None
+    options: Optional[dict] = None
+    system: Optional[str] = None
+    template: Optional[str] = None
+    context: Optional[str] = None
+    stream: Optional[bool] = True
+    raw: Optional[bool] = None
+    keep_alive: Optional[Union[int, str]] = None
+
+
+@app.post("/api/generate")
+@app.post("/api/generate/{url_idx}")
+async def generate_completion(
+    form_data: GenerateCompletionForm,
+    url_idx: Optional[int] = None,
+    user=Depends(get_current_user),
+):
+
+    if url_idx == None:
+        if form_data.model in app.state.MODELS:
+            url_idx = random.choice(app.state.MODELS[form_data.model]["urls"])
+        else:
+            raise HTTPException(
+                status_code=400,
+                detail="error_detail",
+            )
+
+    url = app.state.OLLAMA_BASE_URLS[url_idx]
+    print(url)
+
+    r = None
+
+    def get_request():
+        nonlocal form_data
+        nonlocal r
+
+        request_id = str(uuid.uuid4())
+        try:
+            REQUEST_POOL.append(request_id)
+
+            def stream_content():
+                try:
+                    if form_data.stream:
+                        yield json.dumps({"id": request_id, "done": False}) + "\n"
+
+                    for chunk in r.iter_content(chunk_size=8192):
+                        if request_id in REQUEST_POOL:
+                            yield chunk
+                        else:
+                            print("User: canceled request")
+                            break
+                finally:
+                    if hasattr(r, "close"):
+                        r.close()
+                        if request_id in REQUEST_POOL:
+                            REQUEST_POOL.remove(request_id)
+
+            r = requests.request(
+                method="POST",
+                url=f"{url}/api/generate",
+                data=form_data.model_dump_json(exclude_none=True),
+                stream=True,
+            )
+
+            r.raise_for_status()
+
+            return StreamingResponse(
+                stream_content(),
+                status_code=r.status_code,
+                headers=dict(r.headers),
+            )
+        except Exception as e:
+            raise e
+
+    try:
+        return await run_in_threadpool(get_request)
+    except Exception as e:
+        error_detail = "Open WebUI: Server Connection Error"
+        if r is not None:
+            try:
+                res = r.json()
+                if "error" in res:
+                    error_detail = f"Ollama: {res['error']}"
+            except:
+                error_detail = f"Ollama: {e}"
+
+        raise HTTPException(
+            status_code=r.status_code if r else 500,
+            detail=error_detail,
+        )
+
+
+class ChatMessage(BaseModel):
+    role: str
+    content: str
+    images: Optional[List[str]] = None
+
+
+class GenerateChatCompletionForm(BaseModel):
+    model: str
+    messages: List[ChatMessage]
+    format: Optional[str] = None
+    options: Optional[dict] = None
+    template: Optional[str] = None
+    stream: Optional[bool] = True
+    keep_alive: Optional[Union[int, str]] = None
+
+
+@app.post("/api/chat")
+@app.post("/api/chat/{url_idx}")
+async def generate_chat_completion(
+    form_data: GenerateChatCompletionForm,
+    url_idx: Optional[int] = None,
+    user=Depends(get_current_user),
+):
+
+    if url_idx == None:
+        if form_data.model in app.state.MODELS:
+            url_idx = random.choice(app.state.MODELS[form_data.model]["urls"])
+        else:
+            raise HTTPException(
+                status_code=400,
+                detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.model),
+            )
+
+    url = app.state.OLLAMA_BASE_URLS[url_idx]
+    print(url)
+
+    r = None
+
+    print(form_data.model_dump_json(exclude_none=True))
+
+    def get_request():
+        nonlocal form_data
+        nonlocal r
+
+        request_id = str(uuid.uuid4())
+        try:
+            REQUEST_POOL.append(request_id)
+
+            def stream_content():
+                try:
+                    if form_data.stream:
+                        yield json.dumps({"id": request_id, "done": False}) + "\n"
+
+                    for chunk in r.iter_content(chunk_size=8192):
+                        if request_id in REQUEST_POOL:
+                            yield chunk
+                        else:
+                            print("User: canceled request")
+                            break
+                finally:
+                    if hasattr(r, "close"):
+                        r.close()
+                        if request_id in REQUEST_POOL:
+                            REQUEST_POOL.remove(request_id)
+
+            r = requests.request(
+                method="POST",
+                url=f"{url}/api/chat",
+                data=form_data.model_dump_json(exclude_none=True),
+                stream=True,
+            )
+
+            r.raise_for_status()
+
+            return StreamingResponse(
+                stream_content(),
+                status_code=r.status_code,
+                headers=dict(r.headers),
+            )
+        except Exception as e:
+            raise e
+
+    try:
+        return await run_in_threadpool(get_request)
+    except Exception as e:
+        error_detail = "Open WebUI: Server Connection Error"
+        if r is not None:
+            try:
+                res = r.json()
+                if "error" in res:
+                    error_detail = f"Ollama: {res['error']}"
+            except:
+                error_detail = f"Ollama: {e}"
+
+        raise HTTPException(
+            status_code=r.status_code if r else 500,
+            detail=error_detail,
+        )
+
+
+# TODO: we should update this part once Ollama supports other types
+class OpenAIChatMessage(BaseModel):
+    role: str
+    content: str
+
+    model_config = ConfigDict(extra="allow")
+
+
+class OpenAIChatCompletionForm(BaseModel):
+    model: str
+    messages: List[OpenAIChatMessage]
+
+    model_config = ConfigDict(extra="allow")
+
+
+@app.post("/v1/chat/completions")
+@app.post("/v1/chat/completions/{url_idx}")
+async def generate_openai_chat_completion(
+    form_data: OpenAIChatCompletionForm,
+    url_idx: Optional[int] = None,
+    user=Depends(get_current_user),
+):
+
+    if url_idx == None:
+        if form_data.model in app.state.MODELS:
+            url_idx = random.choice(app.state.MODELS[form_data.model]["urls"])
+        else:
+            raise HTTPException(
+                status_code=400,
+                detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.model),
+            )
+
+    url = app.state.OLLAMA_BASE_URLS[url_idx]
+    print(url)
+
+    r = None
+
+    def get_request():
+        nonlocal form_data
+        nonlocal r
+
+        request_id = str(uuid.uuid4())
+        try:
+            REQUEST_POOL.append(request_id)
+
+            def stream_content():
+                try:
+                    if form_data.stream:
+                        yield json.dumps(
+                            {"request_id": request_id, "done": False}
+                        ) + "\n"
+
+                    for chunk in r.iter_content(chunk_size=8192):
+                        if request_id in REQUEST_POOL:
+                            yield chunk
+                        else:
+                            print("User: canceled request")
+                            break
+                finally:
+                    if hasattr(r, "close"):
+                        r.close()
+                        if request_id in REQUEST_POOL:
+                            REQUEST_POOL.remove(request_id)
+
+            r = requests.request(
+                method="POST",
+                url=f"{url}/v1/chat/completions",
+                data=form_data.model_dump_json(exclude_none=True),
+                stream=True,
+            )
+
+            r.raise_for_status()
+
+            return StreamingResponse(
+                stream_content(),
+                status_code=r.status_code,
+                headers=dict(r.headers),
+            )
+        except Exception as e:
+            raise e
+
+    try:
+        return await run_in_threadpool(get_request)
+    except Exception as e:
+        error_detail = "Open WebUI: Server Connection Error"
+        if r is not None:
+            try:
+                res = r.json()
+                if "error" in res:
+                    error_detail = f"Ollama: {res['error']}"
+            except:
+                error_detail = f"Ollama: {e}"
+
+        raise HTTPException(
+            status_code=r.status_code if r else 500,
+            detail=error_detail,
+        )
+
+
 @app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
-async def proxy(path: str, request: Request, user=Depends(get_current_user)):
-    target_url = f"{app.state.OLLAMA_BASE_URL}/{path}"
+async def deprecated_proxy(path: str, request: Request, user=Depends(get_current_user)):
+    url = app.state.OLLAMA_BASE_URLS[0]
+    target_url = f"{url}/{path}"

    body = await request.body()
    headers = dict(request.headers)

--- a/backend/apps/ollama/old_main.py
+++ b/backend/apps/ollama/old_main.py
-from fastapi import FastAPI, Request, Response, HTTPException, Depends
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import StreamingResponse
-
-import requests
-import json
-from pydantic import BaseModel
-
-from apps.web.models.users import Users
-from constants import ERROR_MESSAGES
-from utils.utils import decode_token, get_current_user
-from config import OLLAMA_API_BASE_URL, WEBUI_AUTH
-
-import aiohttp
-
-app = FastAPI()
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-
-app.state.OLLAMA_API_BASE_URL = OLLAMA_API_BASE_URL
-
-# TARGET_SERVER_URL = OLLAMA_API_BASE_URL
-
-
-@app.get("/url")
-async def get_ollama_api_url(user=Depends(get_current_user)):
-    if user and user.role == "admin":
-        return {"OLLAMA_API_BASE_URL": app.state.OLLAMA_API_BASE_URL}
-    else:
-        raise HTTPException(status_code=401, detail=ERROR_MESSAGES.ACCESS_PROHIBITED)
-
-
-class UrlUpdateForm(BaseModel):
-    url: str
-
-
-@app.post("/url/update")
-async def update_ollama_api_url(
-    form_data: UrlUpdateForm, user=Depends(get_current_user)
-):
-    if user and user.role == "admin":
-        app.state.OLLAMA_API_BASE_URL = form_data.url
-        return {"OLLAMA_API_BASE_URL": app.state.OLLAMA_API_BASE_URL}
-    else:
-        raise HTTPException(status_code=401, detail=ERROR_MESSAGES.ACCESS_PROHIBITED)
-
-
-# async def fetch_sse(method, target_url, body, headers):
-#     async with aiohttp.ClientSession() as session:
-#         try:
-#             async with session.request(
-#                 method, target_url, data=body, headers=headers
-#             ) as response:
-#                 print(response.status)
-#                 async for line in response.content:
-#                     yield line
-#         except Exception as e:
-#             print(e)
-#             error_detail = "Open WebUI: Server Connection Error"
-#             yield json.dumps({"error": error_detail, "message": str(e)}).encode()
-
-
-@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
-async def proxy(path: str, request: Request, user=Depends(get_current_user)):
-    target_url = f"{app.state.OLLAMA_API_BASE_URL}/{path}"
-    print(target_url)
-
-    body = await request.body()
-    headers = dict(request.headers)
-
-    if user.role in ["user", "admin"]:
-        if path in ["pull", "delete", "push", "copy", "create"]:
-            if user.role != "admin":
-                raise HTTPException(
-                    status_code=401, detail=ERROR_MESSAGES.ACCESS_PROHIBITED
-                )
-    else:
-        raise HTTPException(status_code=401, detail=ERROR_MESSAGES.ACCESS_PROHIBITED)
-
-    headers.pop("Host", None)
-    headers.pop("Authorization", None)
-    headers.pop("Origin", None)
-    headers.pop("Referer", None)
-
-    session = aiohttp.ClientSession()
-    response = None
-    try:
-        response = await session.request(
-            request.method, target_url, data=body, headers=headers
-        )
-
-        print(response)
-        if not response.ok:
-            data = await response.json()
-            print(data)
-            response.raise_for_status()
-
-        async def generate():
-            async for line in response.content:
-                print(line)
-                yield line
-            await session.close()
-
-        return StreamingResponse(generate(), response.status)
-
-    except Exception as e:
-        print(e)
-        error_detail = "Open WebUI: Server Connection Error"
-
-        if response is not None:
-            try:
-                res = await response.json()
-                if "error" in res:
-                    error_detail = f"Ollama: {res['error']}"
-            except:
-                error_detail = f"Ollama: {e}"
-
-        await session.close()
-        raise HTTPException(
-            status_code=response.status if response else 500,
-            detail=error_detail,
-        )
--- a/backend/apps/rag/main.py
+++ b/backend/apps/rag/main.py
@@ -108,7 +108,7 @@ class StoreWebForm(CollectionNameForm):
    url: str


-def store_data_in_vector_db(data, collection_name) -> bool:
+def store_data_in_vector_db(data, collection_name, overwrite: bool = False) -> bool:
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=app.state.CHUNK_SIZE, chunk_overlap=app.state.CHUNK_OVERLAP
    )
@@ -118,6 +118,12 @@ def store_data_in_vector_db(data, collection_name) -> bool:
    metadatas = [doc.metadata for doc in docs]

    try:
+        if overwrite:
+            for collection in CHROMA_CLIENT.list_collections():
+                if collection_name == collection.name:
+                    print(f"deleting existing collection {collection_name}")
+                    CHROMA_CLIENT.delete_collection(name=collection_name)
+
        collection = CHROMA_CLIENT.create_collection(
            name=collection_name,
            embedding_function=app.state.sentence_transformer_ef,
@@ -355,7 +361,7 @@ def store_web(form_data: StoreWebForm, user=Depends(get_current_user)):
        if collection_name == "":
            collection_name = calculate_sha256_string(form_data.url)[:63]

-        store_data_in_vector_db(data, collection_name)
+        store_data_in_vector_db(data, collection_name, overwrite=True)
        return {
            "status": True,
            "collection_name": collection_name,

--- a/backend/constants.py
+++ b/backend/constants.py
@@ -48,3 +48,5 @@ class ERROR_MESSAGES(str, Enum):
        lambda err="": f"Invalid format. Please use the correct format{err if err else ''}"
    )
    RATE_LIMIT_EXCEEDED = "API rate limit exceeded"
+
+    MODEL_NOT_FOUND = lambda name="": f"Model '{name}' was not found"
--- a/backend/main.py
+++ b/backend/main.py
@@ -104,7 +104,7 @@ async def auth_middleware(request: Request, call_next):
 app.mount("/api/v1", webui_app)
 app.mount("/litellm/api", litellm_app)

-app.mount("/ollama/api", ollama_app)
+app.mount("/ollama", ollama_app)
 app.mount("/openai/api", openai_app)

 app.mount("/images/api/v1", images_app)
@@ -125,6 +125,14 @@ async def get_app_config():
    }


+@app.get("/api/version")
+async def get_app_config():
+
+    return {
+        "version": VERSION,
+    }
+
+
 @app.get("/api/changelog")
 async def get_app_changelog():
    return CHANGELOG

--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -22,6 +22,7 @@ google-generativeai

 langchain
 langchain-community
+fake_useragent
 chromadb
 sentence_transformers
 pypdf

--- a/src/lib/apis/ollama/index.ts
+++ b/src/lib/apis/ollama/index.ts
 import { OLLAMA_API_BASE_URL } from '$lib/constants';

-export const getOllamaAPIUrl = async (token: string = '') => {
+export const getOllamaUrls = async (token: string = '') => {
 	let error = null;

-	const res = await fetch(`${OLLAMA_API_BASE_URL}/url`, {
+	const res = await fetch(`${OLLAMA_API_BASE_URL}/urls`, {
 		method: 'GET',
 		headers: {
 			Accept: 'application/json',
@@ -29,13 +29,13 @@ export const getOllamaAPIUrl = async (token: string = '') => {
 		throw error;
 	}

-	return res.OLLAMA_BASE_URL;
+	return res.OLLAMA_BASE_URLS;
 };

-export const updateOllamaAPIUrl = async (token: string = '', url: string) => {
+export const updateOllamaUrls = async (token: string = '', urls: string[]) => {
 	let error = null;

-	const res = await fetch(`${OLLAMA_API_BASE_URL}/url/update`, {
+	const res = await fetch(`${OLLAMA_API_BASE_URL}/urls/update`, {
 		method: 'POST',
 		headers: {
 			Accept: 'application/json',
@@ -43,7 +43,7 @@ export const updateOllamaAPIUrl = async (token: string = '', url: string) => {
 			...(token && { authorization: `Bearer ${token}` })
 		},
 		body: JSON.stringify({
-			url: url
+			urls: urls
 		})
 	})
 		.then(async (res) => {
@@ -64,7 +64,7 @@ export const updateOllamaAPIUrl = async (token: string = '', url: string) => {
 		throw error;
 	}

-	return res.OLLAMA_BASE_URL;
+	return res.OLLAMA_BASE_URLS;
 };

 export const getOllamaVersion = async (token: string = '') => {
@@ -151,7 +151,8 @@ export const generateTitle = async (
 	const res = await fetch(`${OLLAMA_API_BASE_URL}/api/generate`, {
 		method: 'POST',
 		headers: {
-			'Content-Type': 'text/event-stream',
+			Accept: 'application/json',
+			'Content-Type': 'application/json',
 			Authorization: `Bearer ${token}`
 		},
 		body: JSON.stringify({
@@ -189,7 +190,8 @@ export const generatePrompt = async (token: string = '', model: string, conversa
 	const res = await fetch(`${OLLAMA_API_BASE_URL}/api/generate`, {
 		method: 'POST',
 		headers: {
-			'Content-Type': 'text/event-stream',
+			Accept: 'application/json',
+			'Content-Type': 'application/json',
 			Authorization: `Bearer ${token}`
 		},
 		body: JSON.stringify({
@@ -223,7 +225,8 @@ export const generateTextCompletion = async (token: string = '', model: string,
 	const res = await fetch(`${OLLAMA_API_BASE_URL}/api/generate`, {
 		method: 'POST',
 		headers: {
-			'Content-Type': 'text/event-stream',
+			Accept: 'application/json',
+			'Content-Type': 'application/json',
 			Authorization: `Bearer ${token}`
 		},
 		body: JSON.stringify({
@@ -251,7 +254,8 @@ export const generateChatCompletion = async (token: string = '', body: object) =
 		signal: controller.signal,
 		method: 'POST',
 		headers: {
-			'Content-Type': 'text/event-stream',
+			Accept: 'application/json',
+			'Content-Type': 'application/json',
 			Authorization: `Bearer ${token}`
 		},
 		body: JSON.stringify(body)
@@ -294,7 +298,8 @@ export const createModel = async (token: string, tagName: string, content: strin
 	const res = await fetch(`${OLLAMA_API_BASE_URL}/api/create`, {
 		method: 'POST',
 		headers: {
-			'Content-Type': 'text/event-stream',
+			Accept: 'application/json',
+			'Content-Type': 'application/json',
 			Authorization: `Bearer ${token}`
 		},
 		body: JSON.stringify({
@@ -313,19 +318,23 @@ export const createModel = async (token: string, tagName: string, content: strin
 	return res;
 };

-export const deleteModel = async (token: string, tagName: string) => {
+export const deleteModel = async (token: string, tagName: string, urlIdx: string | null = null) => {
 	let error = null;

-	const res = await fetch(`${OLLAMA_API_BASE_URL}/api/delete`, {
+	const res = await fetch(
+		`${OLLAMA_API_BASE_URL}/api/delete${urlIdx !== null ? `/${urlIdx}` : ''}`,
+		{
 			method: 'DELETE',
 			headers: {
-			'Content-Type': 'text/event-stream',
+				Accept: 'application/json',
+				'Content-Type': 'application/json',
 				Authorization: `Bearer ${token}`
 			},
 			body: JSON.stringify({
 				name: tagName
 			})
-	})
+		}
+	)
 		.then(async (res) => {
 			if (!res.ok) throw await res.json();
 			return res.json();
@@ -336,7 +345,12 @@ export const deleteModel = async (token: string, tagName: string) => {
 		})
 		.catch((err) => {
 			console.log(err);
-			error = err.error;
+			error = err;
+
+			if ('detail' in err) {
+				error = err.detail;
+			}
+
 			return null;
 		});

@@ -347,13 +361,14 @@ export const deleteModel = async (token: string, tagName: string) => {
 	return res;
 };

-export const pullModel = async (token: string, tagName: string) => {
+export const pullModel = async (token: string, tagName: string, urlIdx: string | null = null) => {
 	let error = null;

-	const res = await fetch(`${OLLAMA_API_BASE_URL}/api/pull`, {
+	const res = await fetch(`${OLLAMA_API_BASE_URL}/api/pull${urlIdx !== null ? `/${urlIdx}` : ''}`, {
 		method: 'POST',
 		headers: {
-			'Content-Type': 'text/event-stream',
+			Accept: 'application/json',
+			'Content-Type': 'application/json',
 			Authorization: `Bearer ${token}`
 		},
 		body: JSON.stringify({

--- a/src/lib/components/chat/MessageInput.svelte
+++ b/src/lib/components/chat/MessageInput.svelte
@@ -21,7 +21,7 @@

 	export let suggestionPrompts = [];
 	export let autoScroll = true;
-
+	let chatTextAreaElement:HTMLTextAreaElement
 	let filesInputElement;

 	let promptsElement;
@@ -45,11 +45,9 @@
 	let speechRecognition;

 	$: if (prompt) {
-		const chatInput = document.getElementById('chat-textarea');
-
-		if (chatInput) {
-			chatInput.style.height = '';
-			chatInput.style.height = Math.min(chatInput.scrollHeight, 200) + 'px';
+		if (chatTextAreaElement) {
+			chatTextAreaElement.style.height = '';
+			chatTextAreaElement.style.height = Math.min(chatTextAreaElement.scrollHeight, 200) + 'px';
 		}
 	}

@@ -88,9 +86,7 @@
 			if (res) {
 				prompt = res.text;
 				await tick();
-
-				const inputElement = document.getElementById('chat-textarea');
-				inputElement?.focus();
+				chatTextAreaElement?.focus();

 				if (prompt !== '' && $settings?.speechAutoSend === true) {
 					submitPrompt(prompt, user);
@@ -193,8 +189,7 @@
 						prompt = `${prompt}${transcript}`;

 						await tick();
-						const inputElement = document.getElementById('chat-textarea');
-						inputElement?.focus();
+						chatTextAreaElement?.focus();

 						// Restart the inactivity timeout
 						timeoutId = setTimeout(() => {
@@ -296,8 +291,7 @@
 	};

 	onMount(() => {
-		const chatInput = document.getElementById('chat-textarea');
-		window.setTimeout(() => chatInput?.focus(), 0);
+		window.setTimeout(() => chatTextAreaElement?.focus(), 0);

 		const dropZone = document.querySelector('body');

@@ -671,6 +665,7 @@

 						<textarea
 							id="chat-textarea"
+							bind:this={chatTextAreaElement}
 							class=" dark:bg-gray-900 dark:text-gray-100 outline-none w-full py-3 px-3 {fileUploadEnabled
 								? ''
 								: ' pl-4'} rounded-xl resize-none h-[48px]"

--- a/src/lib/components/chat/Messages/ResponseMessage.svelte
+++ b/src/lib/components/chat/Messages/ResponseMessage.svelte
@@ -42,7 +42,7 @@

 	let edit = false;
 	let editedContent = '';
-
+	let editTextAreaElement: HTMLTextAreaElement;
 	let tooltipInstance = null;

 	let sentencesAudio = {};
@@ -249,10 +249,9 @@
 		editedContent = message.content;

 		await tick();
-		const editElement = document.getElementById(`message-edit-${message.id}`);

-		editElement.style.height = '';
-		editElement.style.height = `${editElement.scrollHeight}px`;
+		editTextAreaElement.style.height = '';
+		editTextAreaElement.style.height = `${editTextAreaElement.scrollHeight}px`;
 	};

 	const editMessageConfirmHandler = async () => {
@@ -343,6 +342,7 @@
 							<div class=" w-full">
 								<textarea
 									id="message-edit-{message.id}"
+									bind:this={editTextAreaElement}
 									class=" bg-transparent outline-none w-full resize-none"
 									bind:value={editedContent}
 									on:input={(e) => {

--- a/src/lib/components/chat/Messages/UserMessage.svelte
+++ b/src/lib/components/chat/Messages/UserMessage.svelte
@@ -22,18 +22,17 @@

 	let edit = false;
 	let editedContent = '';
-
+	let messageEditTextAreaElement: HTMLTextAreaElement;
 	const editMessageHandler = async () => {
 		edit = true;
 		editedContent = message.content;

 		await tick();
-		const editElement = document.getElementById(`message-edit-${message.id}`);

-		editElement.style.height = '';
-		editElement.style.height = `${editElement.scrollHeight}px`;
+		messageEditTextAreaElement.style.height = '';
+		messageEditTextAreaElement.style.height = `${messageEditTextAreaElement.scrollHeight}px`;

-		editElement?.focus();
+		messageEditTextAreaElement?.focus();
 	};

 	const editMessageConfirmHandler = async () => {
@@ -168,10 +167,11 @@
 				<div class=" w-full">
 					<textarea
 						id="message-edit-{message.id}"
+						bind:this={messageEditTextAreaElement}
 						class=" bg-transparent outline-none w-full resize-none"
 						bind:value={editedContent}
 						on:input={(e) => {
-							e.target.style.height = `${e.target.scrollHeight}px`;
+							messageEditTextAreaElement.style.height = `${messageEditTextAreaElement.scrollHeight}px`;
 						}}
 					/>


--- a/src/lib/components/chat/Settings/Account.svelte
+++ b/src/lib/components/chat/Settings/Account.svelte
@@ -17,6 +17,7 @@
 	let name = '';
 	let showJWTToken = false;
 	let JWTTokenCopied = false;
+	let profileImageInputElement: HTMLInputElement;

 	const submitHandler = async () => {
 		const updatedUser = await updateUserProfile(localStorage.token, name, profileImageUrl).catch(
@@ -42,11 +43,12 @@
 	<div class=" space-y-3 pr-1.5 overflow-y-scroll max-h-80">
 		<input
 			id="profile-image-input"
+			bind:this={profileImageInputElement}
 			type="file"
 			hidden
 			accept="image/*"
 			on:change={(e) => {
-				const files = e?.target?.files ?? [];
+				const files = profileImageInputElement.files ?? [];
 				let reader = new FileReader();
 				reader.onload = (event) => {
 					let originalImageUrl = `${event.target.result}`;
@@ -88,7 +90,7 @@
 						// Display the compressed image
 						profileImageUrl = compressedSrc;

-						e.target.files = null;
+						profileImageInputElement.files = null;
 					};
 				};

@@ -109,9 +111,7 @@
 					<button
 						class="relative rounded-full dark:bg-gray-700"
 						type="button"
-						on:click={() => {
-							document.getElementById('profile-image-input')?.click();
-						}}
+						on:click={profileImageInputElement.click}
 					>
 						<img
 							src={profileImageUrl !== '' ? profileImageUrl : '/user.png'}

--- a/src/lib/components/chat/Settings/Chats.svelte
+++ b/src/lib/components/chat/Settings/Chats.svelte
@@ -24,6 +24,7 @@
 	let saveChatHistory = true;
 	let importFiles;
 	let showDeleteConfirm = false;
+	let chatImportInputElement: HTMLInputElement;

 	$: if (importFiles) {
 		console.log(importFiles);
@@ -161,12 +162,17 @@
 		<hr class=" dark:border-gray-700" />

 		<div class="flex flex-col">
-			<input id="chat-import-input" bind:files={importFiles} type="file" accept=".json" hidden />
+			<input
+				id="chat-import-input"
+				bind:this={chatImportInputElement}
+				bind:files={importFiles}
+				type="file"
+				accept=".json"
+				hidden
+			/>
 			<button
 				class=" flex rounded-md py-2 px-3.5 w-full hover:bg-gray-200 dark:hover:bg-gray-800 transition"
-				on:click={() => {
-					document.getElementById('chat-import-input').click();
-				}}
+				on:click={chatImportInputElement.click}
 			>
 				<div class=" self-center mr-3">
 					<svg

--- a/src/lib/components/chat/Settings/Connections.svelte
+++ b/src/lib/components/chat/Settings/Connections.svelte
@@ -3,7 +3,7 @@
 	import { createEventDispatcher, onMount, getContext } from 'svelte';
 	const dispatch = createEventDispatcher();

-	import { getOllamaAPIUrl, getOllamaVersion, updateOllamaAPIUrl } from '$lib/apis/ollama';
+	import { getOllamaUrls, getOllamaVersion, updateOllamaUrls } from '$lib/apis/ollama';
 	import { getOpenAIKey, getOpenAIUrl, updateOpenAIKey, updateOpenAIUrl } from '$lib/apis/openai';
 	import { toast } from 'svelte-sonner';

@@ -12,7 +12,8 @@
 	export let getModels: Function;

 	// External
-	let API_BASE_URL = '';
+	let OLLAMA_BASE_URL = '';
+	let OLLAMA_BASE_URLS = [''];

 	let OPENAI_API_KEY = '';
 	let OPENAI_API_BASE_URL = '';
@@ -27,8 +28,8 @@
 		await models.set(await getModels());
 	};

-	const updateOllamaAPIUrlHandler = async () => {
-		API_BASE_URL = await updateOllamaAPIUrl(localStorage.token, API_BASE_URL);
+	const updateOllamaUrlsHandler = async () => {
+		OLLAMA_BASE_URLS = await updateOllamaUrls(localStorage.token, OLLAMA_BASE_URLS);

 		const ollamaVersion = await getOllamaVersion(localStorage.token).catch((error) => {
 			toast.error(error);
@@ -43,7 +44,7 @@

 	onMount(async () => {
 		if ($user.role === 'admin') {
-			API_BASE_URL = await getOllamaAPIUrl(localStorage.token);
+			OLLAMA_BASE_URLS = await getOllamaUrls(localStorage.token);
 			OPENAI_API_BASE_URL = await getOpenAIUrl(localStorage.token);
 			OPENAI_API_KEY = await getOpenAIKey(localStorage.token);
 		}
@@ -55,11 +56,6 @@
 	on:submit|preventDefault={() => {
 		updateOpenAIHandler();
 		dispatch('save');
-
-		// saveSettings({
-		// 	OPENAI_API_KEY: OPENAI_API_KEY !== '' ? OPENAI_API_KEY : undefined,
-		// 	OPENAI_API_BASE_URL: OPENAI_API_BASE_URL !== '' ? OPENAI_API_BASE_URL : undefined
-		// });
 	}}
 >
 	<div class="  pr-1.5 overflow-y-scroll max-h-[20.5rem] space-y-3">
@@ -116,19 +112,65 @@
 		<hr class=" dark:border-gray-700" />

 		<div>
-			<div class=" mb-2.5 text-sm font-medium">{$i18n.t('Ollama API URL')}</div>
-			<div class="flex w-full">
-				<div class="flex-1 mr-2">
+			<div class=" mb-2.5 text-sm font-medium">Ollama Base URL</div>
+			<div class="flex w-full gap-1.5">
+				<div class="flex-1 flex flex-col gap-2">
+					{#each OLLAMA_BASE_URLS as url, idx}
+						<div class="flex gap-1.5">
 							<input
-						class="w-full rounded py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-800 outline-none"
+								class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
 								placeholder="Enter URL (e.g. http://localhost:11434)"
-						bind:value={API_BASE_URL}
+								bind:value={url}
+							/>
+
+							<div class="self-center flex items-center">
+								{#if idx === 0}
+									<button
+										class="px-1"
+										on:click={() => {
+											OLLAMA_BASE_URLS = [...OLLAMA_BASE_URLS, ''];
+										}}
+										type="button"
+									>
+										<svg
+											xmlns="http://www.w3.org/2000/svg"
+											viewBox="0 0 16 16"
+											fill="currentColor"
+											class="w-4 h-4"
+										>
+											<path
+												d="M8.75 3.75a.75.75 0 0 0-1.5 0v3.5h-3.5a.75.75 0 0 0 0 1.5h3.5v3.5a.75.75 0 0 0 1.5 0v-3.5h3.5a.75.75 0 0 0 0-1.5h-3.5v-3.5Z"
 											/>
+										</svg>
+									</button>
+								{:else}
+									<button
+										class="px-1"
+										on:click={() => {
+											OLLAMA_BASE_URLS = OLLAMA_BASE_URLS.filter((url, urlIdx) => idx !== urlIdx);
+										}}
+										type="button"
+									>
+										<svg
+											xmlns="http://www.w3.org/2000/svg"
+											viewBox="0 0 16 16"
+											fill="currentColor"
+											class="w-4 h-4"
+										>
+											<path d="M3.75 7.25a.75.75 0 0 0 0 1.5h8.5a.75.75 0 0 0 0-1.5h-8.5Z" />
+										</svg>
+									</button>
+								{/if}
 							</div>
+						</div>
+					{/each}
+				</div>
+
+				<div class="">
 					<button
-					class="px-3 bg-gray-200 hover:bg-gray-300 dark:bg-gray-600 dark:hover:bg-gray-700 rounded transition"
+						class="p-2.5 bg-gray-200 hover:bg-gray-300 dark:bg-gray-850 dark:hover:bg-gray-800 rounded-lg transition"
 						on:click={() => {
-						updateOllamaAPIUrlHandler();
+							updateOllamaUrlsHandler();
 						}}
 						type="button"
 					>
@@ -146,6 +188,7 @@
 						</svg>
 					</button>
 				</div>
+			</div>

 			<div class="mt-2 text-xs text-gray-400 dark:text-gray-500">
 				{$i18n.t('Trouble accessing Ollama?')}

--- a/src/lib/components/chat/Settings/Models.svelte
+++ b/src/lib/components/chat/Settings/Models.svelte
@@ -2,7 +2,13 @@
 	import queue from 'async/queue';
 	import { toast } from 'svelte-sonner';

-	import { createModel, deleteModel, getOllamaVersion, pullModel } from '$lib/apis/ollama';
+	import {
+		createModel,
+		deleteModel,
+		getOllamaUrls,
+		getOllamaVersion,
+		pullModel
+	} from '$lib/apis/ollama';
 	import { WEBUI_API_BASE_URL, WEBUI_BASE_URL } from '$lib/constants';
 	import { WEBUI_NAME, models, user } from '$lib/stores';
 	import { splitStream } from '$lib/utils';
@@ -15,7 +21,7 @@

 	let showLiteLLM = false;
 	let showLiteLLMParams = false;
-
+	let modelUploadInputElement: HTMLInputElement;
 	let liteLLMModelInfo = [];

 	let liteLLMModel = '';
@@ -29,6 +35,9 @@
 	$: liteLLMModelName = liteLLMModel;

 	// Models
+
+	let OLLAMA_URLS = [];
+	let selectedOllamaUrlIdx: string | null = null;
 	let showExperimentalOllama = false;
 	let ollamaVersion = '';
 	const MAX_PARALLEL_DOWNLOADS = 3;
@@ -246,9 +255,11 @@
 	};

 	const deleteModelHandler = async () => {
-		const res = await deleteModel(localStorage.token, deleteModelTag).catch((error) => {
+		const res = await deleteModel(localStorage.token, deleteModelTag, selectedOllamaUrlIdx).catch(
+			(error) => {
 				toast.error(error);
-		});
+			}
+		);

 		if (res) {
 			toast.success($i18n.t(`Deleted {{deleteModelTag}}`, { deleteModelTag }));
@@ -259,10 +270,12 @@
 	};

 	const pullModelHandlerProcessor = async (opts: { modelName: string; callback: Function }) => {
-		const res = await pullModel(localStorage.token, opts.modelName).catch((error) => {
+		const res = await pullModel(localStorage.token, opts.modelName, selectedOllamaUrlIdx).catch(
+			(error) => {
 				opts.callback({ success: false, error, modelName: opts.modelName });
 				return null;
-		});
+			}
+		);

 		if (res) {
 			const reader = res.body
@@ -368,6 +381,15 @@
 	};

 	onMount(async () => {
+		OLLAMA_URLS = await getOllamaUrls(localStorage.token).catch((error) => {
+			toast.error(error);
+			return [];
+		});
+
+		if (OLLAMA_URLS.length > 1) {
+			selectedOllamaUrlIdx = 0;
+		}
+
 		ollamaVersion = await getOllamaVersion(localStorage.token).catch((error) => false);
 		liteLLMModelInfo = await getLiteLLMModelInfo(localStorage.token);
 	});
@@ -377,20 +399,35 @@
 	<div class=" space-y-3 pr-1.5 overflow-y-scroll h-[23rem]">
 		{#if ollamaVersion}
 			<div class="space-y-2 pr-1.5">
-				<div>
-					<div class=" mb-2 text-sm font-medium">{$i18n.t('Manage Ollama Models')}</div>
+				<div class="text-sm font-medium">{$i18n.t('Manage Ollama Models')}</div>

+				{#if OLLAMA_URLS.length > 1}
+					<div class="flex-1 pb-1">
+						<select
+							class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
+							bind:value={selectedOllamaUrlIdx}
+							placeholder="Select an Ollama instance"
+						>
+							{#each OLLAMA_URLS as url, idx}
+								<option value={idx} class="bg-gray-100 dark:bg-gray-700">{url}</option>
+							{/each}
+						</select>
+					</div>
+				{/if}
+
+				<div class="space-y-2">
+					<div>
 						<div class=" mb-2 text-sm font-medium">{$i18n.t('Pull a model from Ollama.com')}</div>
 						<div class="flex w-full">
 							<div class="flex-1 mr-2">
 								<input
-								class="w-full rounded py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
+									class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
 									placeholder="Enter model tag (e.g. mistral:7b)"
 									bind:value={modelTag}
 								/>
 							</div>
 							<button
-							class="px-3 bg-gray-100 hover:bg-gray-200 text-gray-800 dark:bg-gray-850 dark:hover:bg-gray-800 dark:text-gray-100 rounded transition"
+								class="px-2.5 bg-gray-100 hover:bg-gray-200 text-gray-800 dark:bg-gray-850 dark:hover:bg-gray-800 dark:text-gray-100 rounded-lg transition"
 								on:click={() => {
 									pullModelHandler();
 								}}
@@ -441,11 +478,10 @@
 						</div>

 						<div class="mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500">
-						{$i18n.t('To access the available model names for downloading,')}
-						<a
+							To access the available model names for downloading, <a
 								class=" text-gray-500 dark:text-gray-300 font-medium underline"
 								href="https://ollama.com/library"
-							target="_blank">{$i18n.t('click here.')}</a
+								target="_blank">click here.</a
 							>
 						</div>

@@ -470,16 +506,16 @@
 					</div>

 					<div>
-					<div class=" mb-2 text-sm font-medium">{$i18n.t('Delete a model')}</div>
+						<div class=" mb-2 text-sm font-medium">Delete a model</div>
 						<div class="flex w-full">
 							<div class="flex-1 mr-2">
 								<select
-								class="w-full rounded py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
+									class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
 									bind:value={deleteModelTag}
-								placeholder={$i18n.t('Select a model')}
+									placeholder="Select a model"
 								>
 									{#if !deleteModelTag}
-									<option value="" disabled selected>{$i18n.t('Select a model')}</option>
+										<option value="" disabled selected>Select a model</option>
 									{/if}
 									{#each $models.filter((m) => m.size != null) as model}
 										<option value={model.name} class="bg-gray-100 dark:bg-gray-700"
@@ -489,7 +525,7 @@
 								</select>
 							</div>
 							<button
-							class="px-3 bg-gray-100 hover:bg-gray-200 text-gray-800 dark:bg-gray-850 dark:hover:bg-gray-800 dark:text-gray-100 rounded transition"
+								class="px-2.5 bg-gray-100 hover:bg-gray-200 text-gray-800 dark:bg-gray-850 dark:hover:bg-gray-800 dark:text-gray-100 rounded-lg transition"
 								on:click={() => {
 									deleteModelHandler();
 								}}
@@ -510,15 +546,15 @@
 						</div>
 					</div>

-				<div>
+					<div class="pt-1">
 						<div class="flex justify-between items-center text-xs">
-						<div class=" text-sm font-medium">{$i18n.t('Experimental')}</div>
+							<div class=" text-sm font-medium">Experimental</div>
 							<button
 								class=" text-xs font-medium text-gray-500"
 								type="button"
 								on:click={() => {
 									showExperimentalOllama = !showExperimentalOllama;
-							}}>{showExperimentalOllama ? $i18n.t('Show') : $i18n.t('Hide')}</button
+								}}>{showExperimentalOllama ? 'Hide' : 'Show'}</button
 							>
 						</div>
 					</div>
@@ -530,7 +566,7 @@
 							}}
 						>
 							<div class=" mb-2 flex w-full justify-between">
-							<div class="  text-sm font-medium">{$i18n.t('Upload a GGUF model')}</div>
+								<div class="  text-sm font-medium">Upload a GGUF model</div>

 								<button
 									class="p-1 px-3 text-xs flex rounded transition"
@@ -544,9 +580,9 @@
 									type="button"
 								>
 									{#if modelUploadMode === 'file'}
-									<span class="ml-2 self-center">{$i18n.t('File Mode')}</span>
+										<span class="ml-2 self-center">File Mode</span>
 									{:else}
-									<span class="ml-2 self-center">{$i18n.t('URL Mode')}</span>
+										<span class="ml-2 self-center">URL Mode</span>
 									{/if}
 								</button>
 							</div>
@@ -557,6 +593,7 @@
 										<div class="flex-1 {modelInputFile && modelInputFile.length > 0 ? 'mr-2' : ''}">
 											<input
 												id="model-upload-input"
+												bind:this={modelUploadInputElement}
 												type="file"
 												bind:files={modelInputFile}
 												on:change={() => {
@@ -569,10 +606,8 @@

 											<button
 												type="button"
-											class="w-full rounded text-left py-2 px-4 dark:text-gray-300 dark:bg-gray-850"
-											on:click={() => {
-												document.getElementById('model-upload-input').click();
-											}}
+												class="w-full rounded-lg text-left py-2 px-4 dark:text-gray-300 dark:bg-gray-850"
+												on:click={modelUploadInputElement.click}
 											>
 												{#if modelInputFile && modelInputFile.length > 0}
 													{modelInputFile[0].name}
@@ -584,7 +619,7 @@
 									{:else}
 										<div class="flex-1 {modelFileUrl !== '' ? 'mr-2' : ''}">
 											<input
-											class="w-full rounded text-left py-2 px-4 dark:text-gray-300 dark:bg-gray-850 outline-none {modelFileUrl !==
+												class="w-full rounded-lg text-left py-2 px-4 dark:text-gray-300 dark:bg-gray-850 outline-none {modelFileUrl !==
 												''
 													? 'mr-2'
 													: ''}"
@@ -651,7 +686,7 @@
 							{#if (modelUploadMode === 'file' && modelInputFile && modelInputFile.length > 0) || (modelUploadMode === 'url' && modelFileUrl !== '')}
 								<div>
 									<div>
-									<div class=" my-2.5 text-sm font-medium">{$i18n.t('Modelfile Content')}</div>
+										<div class=" my-2.5 text-sm font-medium">Modelfile Content</div>
 										<textarea
 											bind:value={modelFileContent}
 											class="w-full rounded py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-800 outline-none resize-none"
@@ -664,13 +699,13 @@
 								To access the GGUF models available for downloading, <a
 									class=" text-gray-500 dark:text-gray-300 font-medium underline"
 									href="https://huggingface.co/models?search=gguf"
-								target="_blank">{$i18n.t('click here.')}</a
+									target="_blank">click here.</a
 								>
 							</div>

 							{#if uploadProgress !== null}
 								<div class="mt-2">
-								<div class=" mb-2 text-xs">{$i18n.t('Upload Progress')}</div>
+									<div class=" mb-2 text-xs">Upload Progress</div>

 									<div class="w-full rounded-full dark:bg-gray-800">
 										<div
@@ -688,6 +723,7 @@
 						</form>
 					{/if}
 				</div>
+			</div>
 			<hr class=" dark:border-gray-700 my-2" />
 		{/if}

@@ -704,7 +740,7 @@
 								type="button"
 								on:click={() => {
 									showLiteLLMParams = !showLiteLLMParams;
-								}}>{showLiteLLMParams ? $i18n.t('Advanced') : $i18n.t('Default')}</button
+								}}>{showLiteLLMParams ? 'Hide Additional Params' : 'Show Additional Params'}</button
 							>
 						</div>
 					</div>
@@ -713,7 +749,7 @@
 						<div class="flex w-full mb-1.5">
 							<div class="flex-1 mr-2">
 								<input
-									class="w-full rounded py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
+									class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
 									placeholder="Enter LiteLLM Model (litellm_params.model)"
 									bind:value={liteLLMModel}
 									autocomplete="off"
@@ -721,7 +757,7 @@
 							</div>

 							<button
-								class="px-3 bg-gray-100 hover:bg-gray-200 text-gray-800 dark:bg-gray-850 dark:hover:bg-gray-800 dark:text-gray-100 rounded transition"
+								class="px-2.5 bg-gray-100 hover:bg-gray-200 text-gray-800 dark:bg-gray-850 dark:hover:bg-gray-800 dark:text-gray-100 rounded-lg transition"
 								on:click={() => {
 									addLiteLLMModelHandler();
 								}}
@@ -745,7 +781,7 @@
 								<div class="flex w-full">
 									<div class="flex-1">
 										<input
-											class="w-full rounded py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
+											class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
 											placeholder="Enter Model Name (model_name)"
 											bind:value={liteLLMModelName}
 											autocomplete="off"
@@ -759,7 +795,7 @@
 								<div class="flex w-full">
 									<div class="flex-1">
 										<input
-											class="w-full rounded py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
+											class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
 											placeholder="Enter LiteLLM API Base URL (litellm_params.api_base)"
 											bind:value={liteLLMAPIBase}
 											autocomplete="off"
@@ -773,7 +809,7 @@
 								<div class="flex w-full">
 									<div class="flex-1">
 										<input
-											class="w-full rounded py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
+											class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
 											placeholder="Enter LiteLLM API Key (litellm_params.api_key)"
 											bind:value={liteLLMAPIKey}
 											autocomplete="off"
@@ -787,7 +823,7 @@
 								<div class="flex w-full">
 									<div class="flex-1">
 										<input
-											class="w-full rounded py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
+											class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
 											placeholder="Enter LiteLLM API RPM (litellm_params.rpm)"
 											bind:value={liteLLMRPM}
 											autocomplete="off"
@@ -814,7 +850,7 @@
 						<div class="flex w-full">
 							<div class="flex-1 mr-2">
 								<select
-									class="w-full rounded py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
+									class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
 									bind:value={deleteLiteLLMModelId}
 									placeholder={$i18n.t('Select a model')}
 								>
@@ -829,7 +865,7 @@
 								</select>
 							</div>
 							<button
-								class="px-3 bg-gray-100 hover:bg-gray-200 text-gray-800 dark:bg-gray-850 dark:hover:bg-gray-800 dark:text-gray-100 rounded transition"
+								class="px-2.5 bg-gray-100 hover:bg-gray-200 text-gray-800 dark:bg-gray-850 dark:hover:bg-gray-800 dark:text-gray-100 rounded-lg transition"
 								on:click={() => {
 									deleteLiteLLMModelHandler();
 								}}

--- a/src/lib/components/common/Tooltip.svelte
+++ b/src/lib/components/common/Tooltip.svelte
@@ -29,6 +29,6 @@
 	});
 </script>

-<div bind:this={tooltipElement}>
+<div bind:this={tooltipElement} aria-label={content}>
 	<slot />
 </div>
--- a/src/lib/components/documents/AddDocModal.svelte
+++ b/src/lib/components/documents/AddDocModal.svelte
@@ -17,7 +17,7 @@

 	export let show = false;
 	export let selectedDoc;
-
+	let uploadDocInputElement: HTMLInputElement;
 	let inputFiles;
 	let tags = [];

@@ -71,7 +71,7 @@
 			}

 			inputFiles = null;
-			document.getElementById('upload-doc-input').value = '';
+			uploadDocInputElement.value = '';
 		} else {
 			toast.error($i18n.t(`File not found.`));
 		}
@@ -128,14 +128,19 @@
 					}}
 				>
 					<div class="mb-3 w-full">
-						<input id="upload-doc-input" hidden bind:files={inputFiles} type="file" multiple />
+						<input
+							id="upload-doc-input"
+							bind:this={uploadDocInputElement}
+							hidden
+							bind:files={inputFiles}
+							type="file"
+							multiple
+						/>

 						<button
 							class="w-full text-sm font-medium py-3 bg-gray-850 hover:bg-gray-800 text-center rounded-xl"
 							type="button"
-							on:click={() => {
-								document.getElementById('upload-doc-input')?.click();
-							}}
+							on:click={uploadDocInputElement.click}
 						>
 							{#if inputFiles}
 								{inputFiles.length > 0 ? `${inputFiles.length}` : ''} document(s) selected.

--- a/src/lib/components/playground/ChatCompletion.svelte
+++ b/src/lib/components/playground/ChatCompletion.svelte
@@ -4,12 +4,11 @@
 	const i18n = getContext('i18n');

 	export let messages = [];
-
+	let textAreaElement: HTMLTextAreaElement;
 	onMount(() => {
 		messages.forEach((message, idx) => {
-			let textareaElement = document.getElementById(`${message.role}-${idx}-textarea`);
-			textareaElement.style.height = '';
-			textareaElement.style.height = textareaElement.scrollHeight + 'px';
+			textAreaElement.style.height = '';
+			textAreaElement.style.height = textAreaElement.scrollHeight + 'px';
 		});
 	});
 </script>
@@ -29,18 +28,19 @@
 			<div class="flex-1">
 				<textarea
 					id="{message.role}-{idx}-textarea"
+					bind:this={textAreaElement}
 					class="w-full bg-transparent outline-none rounded-lg p-2 text-sm resize-none overflow-hidden"
 					placeholder={$i18n.t(
 						`Enter ${message.role === 'user' ? 'a user' : 'an assistant'} message here`
 					)}
 					rows="1"
 					on:input={(e) => {
-						e.target.style.height = '';
-						e.target.style.height = e.target.scrollHeight + 'px';
+						textAreaElement.style.height = '';
+						textAreaElement.style.height = textAreaElement.scrollHeight + 'px';
 					}}
 					on:focus={(e) => {
-						e.target.style.height = '';
-						e.target.style.height = e.target.scrollHeight + 'px';
+						textAreaElement.style.height = '';
+						textAreaElement.style.height = textAreaElement.scrollHeight + 'px';

 						// e.target.style.height = Math.min(e.target.scrollHeight, 200) + 'px';
 					}}

--- a/src/lib/constants.ts
+++ b/src/lib/constants.ts
@@ -7,7 +7,7 @@ export const WEBUI_BASE_URL = dev ? `http://${location.hostname}:8080` : ``;
 export const WEBUI_API_BASE_URL = `${WEBUI_BASE_URL}/api/v1`;

 export const LITELLM_API_BASE_URL = `${WEBUI_BASE_URL}/litellm/api`;
-export const OLLAMA_API_BASE_URL = `${WEBUI_BASE_URL}/ollama/api`;
+export const OLLAMA_API_BASE_URL = `${WEBUI_BASE_URL}/ollama`;
 export const OPENAI_API_BASE_URL = `${WEBUI_BASE_URL}/openai/api`;
 export const AUDIO_API_BASE_URL = `${WEBUI_BASE_URL}/audio/api/v1`;
 export const IMAGES_API_BASE_URL = `${WEBUI_BASE_URL}/images/api/v1`;

--- a/src/routes/(app)/+layout.svelte
+++ b/src/routes/(app)/+layout.svelte
@@ -34,12 +34,13 @@
 	import Sidebar from '$lib/components/layout/Sidebar.svelte';
 	import ShortcutsModal from '$lib/components/chat/ShortcutsModal.svelte';
 	import ChangelogModal from '$lib/components/ChangelogModal.svelte';
+	import Tooltip from '$lib/components/common/Tooltip.svelte';

 	const i18n = getContext('i18n');

 	let ollamaVersion = '';
 	let loaded = false;
-
+	let showShortcutsButtonElement: HTMLButtonElement;
 	let DB = null;
 	let localDBChats = [];

@@ -186,7 +187,7 @@
 				if (isCtrlPressed && event.key === '/') {
 					event.preventDefault();
 					console.log('showShortcuts');
-					document.getElementById('show-shortcuts-button')?.click();
+					showShortcutsButtonElement.click();
 				}
 			});

@@ -203,8 +204,10 @@

 {#if loaded}
 	<div class=" hidden lg:flex fixed bottom-0 right-0 px-3 py-3 z-10">
+		<Tooltip content="help" placement="left">
 			<button
 				id="show-shortcuts-button"
+				bind:this={showShortcutsButtonElement}
 				class="text-gray-600 dark:text-gray-300 bg-gray-300/20 w-6 h-6 flex items-center justify-center text-xs rounded-full"
 				on:click={() => {
 					showShortcuts = !showShortcuts;
@@ -212,6 +215,7 @@
 			>
 				?
 			</button>
+		</Tooltip>
 	</div>

 	<ShortcutsModal bind:show={showShortcuts} />