Merge pull request #273 from ollama-webui/gguf-import

feat: gguf import

Merge pull request #273 from ollama-webui/gguf-import
feat: gguf import
c6b9d4cd · Timothy Jaeryang Baek · GitHub · 6ea9f6e1 · a0181d95 · c6b9d4cd
Unverified Commit c6b9d4cd authored Dec 24, 2023 by Timothy Jaeryang Baek Committed by GitHub Dec 24, 2023
11 changed files
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@
 ChatGPT-Style Web Interface for Ollama 🦙
-**Disclaimer:** *ollama-webui is a community-driven project and is not affiliated with the Ollama team in any way. This initiative is independent, and any inquiries or feedback should be directed to [our community on Discord](https://discord.gg/5rJgQTnV4s). We kindly request users to refrain from contacting or harassing the Ollama team regarding this project.*
+**Disclaimer:** _ollama-webui is a community-driven project and is not affiliated with the Ollama team in any way. This initiative is independent, and any inquiries or feedback should be directed to [our community on Discord](https://discord.gg/5rJgQTnV4s). We kindly request users to refrain from contacting or harassing the Ollama team regarding this project._
 ![Ollama Web UI Demo](./demo.gif)
@@ -35,6 +35,8 @@ Also check our sibling project, [OllamaHub](https://ollamahub.com/), where you c
 - 📥🗑️ **Download/Delete Models**: Easily download or remove models directly from the web UI.
+- ⬆️ **GGUF File Model Creation**: Effortlessly create Ollama models by uploading GGUF files directly from the web UI. Streamlined process with options to upload from your machine or download GGUF files from Hugging Face.
 - 🤖 **Multiple Model Support**: Seamlessly switch between different chat models for diverse interactions.
 - 🔄 **Multi-Modal Support**: Seamlessly engage with models that support multimodal interactions, including images (e.g., LLava).

--- a/backend/.gitignore
+++ b/backend/.gitignore
 __pycache__
 .env
\ No newline at end of file
+_old
+uploads
\ No newline at end of file
--- a/backend/apps/web/main.py
+++ b/backend/apps/web/main.py
 from fastapi import FastAPI, Request, Depends, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
-from apps.web.routers import auths, users
+from apps.web.routers import auths, users, utils
 from config import WEBUI_VERSION, WEBUI_AUTH
 app = FastAPI()
@@ -19,6 +19,7 @@ app.add_middleware(
 app.include_router(auths.router, prefix="/auths", tags=["auths"])
 app.include_router(users.router, prefix="/users", tags=["users"])
+app.include_router(utils.router, prefix="/utils", tags=["utils"])
 @app.get("/")

--- a/backend/apps/web/routers/utils.py
+++ b/backend/apps/web/routers/utils.py
+from fastapi import APIRouter, UploadFile, File, BackgroundTasks
+from fastapi import Depends, HTTPException, status
+from starlette.responses import StreamingResponse
+from pydantic import BaseModel
+import requests
+import os
+import aiohttp
+import json
+from utils.misc import calculate_sha256
+from config import OLLAMA_API_BASE_URL
+router = APIRouter()
+class UploadBlobForm(BaseModel):
+    filename: str
+from urllib.parse import urlparse
+def parse_huggingface_url(hf_url):
+    try:
+        # Parse the URL
+        parsed_url = urlparse(hf_url)
+        # Get the path and split it into components
+        path_components = parsed_url.path.split("/")
+        # Extract the desired output
+        user_repo = "/".join(path_components[1:3])
+        model_file = path_components[-1]
+        return model_file
+    except ValueError:
+        return None
+async def download_file_stream(url, file_path, file_name, chunk_size=1024 * 1024):
+    done = False
+    if os.path.exists(file_path):
+        current_size = os.path.getsize(file_path)
+    else:
+        current_size = 0
+    headers = {"Range": f"bytes={current_size}-"} if current_size > 0 else {}
+    timeout = aiohttp.ClientTimeout(total=600)  # Set the timeout
+    async with aiohttp.ClientSession(timeout=timeout) as session:
+        async with session.get(url, headers=headers) as response:
+            total_size = int(response.headers.get("content-length", 0)) + current_size
+            with open(file_path, "ab+") as file:
+                async for data in response.content.iter_chunked(chunk_size):
+                    current_size += len(data)
+                    file.write(data)
+                    done = current_size == total_size
+                    progress = round((current_size / total_size) * 100, 2)
+                    yield f'data: {{"progress": {progress}, "completed": {current_size}, "total": {total_size}}}\n\n'
+                if done:
+                    file.seek(0)
+                    hashed = calculate_sha256(file)
+                    file.seek(0)
+                    url = f"{OLLAMA_API_BASE_URL}/blobs/sha256:{hashed}"
+                    response = requests.post(url, data=file)
+                    if response.ok:
+                        res = {
+                            "done": done,
+                            "blob": f"sha256:{hashed}",
+                            "name": file_name,
+                        }
+                        os.remove(file_path)
+                        yield f"data: {json.dumps(res)}\n\n"
+                    else:
+                        raise "Ollama: Could not create blob, Please try again."
+@router.get("/download")
+async def download(
+    url: str,
+):
+    # url = "https://huggingface.co/TheBloke/stablelm-zephyr-3b-GGUF/resolve/main/stablelm-zephyr-3b.Q2_K.gguf"
+    file_name = parse_huggingface_url(url)
+    if file_name:
+        os.makedirs("./uploads", exist_ok=True)
+        file_path = os.path.join("./uploads", f"{file_name}")
+        return StreamingResponse(
+            download_file_stream(url, file_path, file_name),
+            media_type="text/event-stream",
+        )
+    else:
+        return None
+@router.post("/upload")
+async def upload(file: UploadFile = File(...)):
+    os.makedirs("./uploads", exist_ok=True)
+    file_path = os.path.join("./uploads", file.filename)
+    async def file_write_stream():
+        total = 0
+        total_size = file.size
+        chunk_size = 1024 * 1024
+        done = False
+        try:
+            with open(file_path, "wb+") as f:
+                while True:
+                    chunk = file.file.read(chunk_size)
+                    if not chunk:
+                        break
+                    f.write(chunk)
+                    total += len(chunk)
+                    done = total_size == total
+                    progress = round((total / total_size) * 100, 2)
+                    res = {
+                        "progress": progress,
+                        "total": total_size,
+                        "completed": total,
+                    }
+                    yield f"data: {json.dumps(res)}\n\n"
+                if done:
+                    f.seek(0)
+                    hashed = calculate_sha256(f)
+                    f.seek(0)
+                    url = f"{OLLAMA_API_BASE_URL}/blobs/sha256:{hashed}"
+                    response = requests.post(url, data=f)
+                    if response.ok:
+                        res = {
+                            "done": done,
+                            "blob": f"sha256:{hashed}",
+                            "name": file.filename,
+                        }
+                        os.remove(file_path)
+                        yield f"data: {json.dumps(res)}\n\n"
+                    else:
+                        raise "Ollama: Could not create blob, Please try again."
+        except Exception as e:
+            res = {"error": str(e)}
+            yield f"data: {json.dumps(res)}\n\n"
+    return StreamingResponse(file_write_stream(), media_type="text/event-stream")
--- a/backend/config.py
+++ b/backend/config.py
@@ -30,7 +30,7 @@ if ENV == "prod":
 # WEBUI_VERSION
 ####################################
-WEBUI_VERSION = os.environ.get("WEBUI_VERSION", "v1.0.0-alpha.35")
+WEBUI_VERSION = os.environ.get("WEBUI_VERSION", "v1.0.0-alpha.40")
 ####################################
 # WEBUI_AUTH

--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -12,6 +12,7 @@ passlib[bcrypt]
 uuid
 requests
+aiohttp
 pymongo
 bcrypt

--- a/backend/utils/misc.py
+++ b/backend/utils/misc.py
@@ -13,3 +13,11 @@ def get_gravatar_url(email):
    # Grab the actual image URL
    return f"https://www.gravatar.com/avatar/{hash_hex}?d=mp"
+def calculate_sha256(file):
+    sha256 = hashlib.sha256()
+    # Read the file in chunks to efficiently handle large files
+    for chunk in iter(lambda: file.read(8192), b""):
+        sha256.update(chunk)
+    return sha256.hexdigest()
--- a/src/lib/components/chat/SettingsModal.svelte
+++ b/src/lib/components/chat/SettingsModal.svelte
--- a/src/lib/constants.ts
+++ b/src/lib/constants.ts
@@ -8,7 +8,8 @@ export const OLLAMA_API_BASE_URL =
 			: `http://localhost:11434/api`
 		: PUBLIC_API_BASE_URL;
-export const WEBUI_API_BASE_URL = dev ? `http://${location.hostname}:8080/api/v1` : `/api/v1`;
+export const WEBUI_BASE_URL = dev ? `http://${location.hostname}:8080` : ``;
+export const WEBUI_API_BASE_URL = `${WEBUI_BASE_URL}/api/v1`;
 export const WEB_UI_VERSION = 'v1.0.0-alpha-static';

--- a/src/routes/(app)/+page.svelte
+++ b/src/routes/(app)/+page.svelte
@@ -245,6 +245,13 @@
 								}
 							} else {
 								responseMessage.done = true;
+								if (responseMessage.content == '') {
+									responseMessage.error = true;
+									responseMessage.content =
+										'Oops! No text generated from Ollama, Please try again.';
+								}
 								responseMessage.context = data.context ?? null;
 								responseMessage.info = {
 									total_duration: data.total_duration,

--- a/src/routes/(app)/c/[id]/+page.svelte
+++ b/src/routes/(app)/c/[id]/+page.svelte
@@ -259,6 +259,13 @@
 								}
 							} else {
 								responseMessage.done = true;
+								if (responseMessage.content == '') {
+									responseMessage.error = true;
+									responseMessage.content =
+										'Oops! No text generated from Ollama, Please try again.';
+								}
 								responseMessage.context = data.context ?? null;
 								responseMessage.info = {
 									total_duration: data.total_duration,