Unverified Commit 09534dad authored by Timothy Jaeryang Baek's avatar Timothy Jaeryang Baek Committed by GitHub
Browse files

Merge branch 'main' into patch-1

parents c4dd20b0 b5c10ff1
......@@ -5,9 +5,10 @@ FROM node:alpine as build
WORKDIR /app
# wget embedding model weight from alpine (does not exist from slim-buster)
RUN wget "https://chroma-onnx-models.s3.amazonaws.com/all-MiniLM-L6-v2/onnx.tar.gz"
RUN wget "https://chroma-onnx-models.s3.amazonaws.com/all-MiniLM-L6-v2/onnx.tar.gz" -O - | \
tar -xzf - -C /app
COPY package.json package-lock.json ./
COPY package.json package-lock.json ./
RUN npm ci
COPY . .
......@@ -34,20 +35,17 @@ COPY ./backend/requirements.txt ./requirements.txt
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir
RUN pip3 install -r requirements.txt --no-cache-dir
# Install pandoc
# Install pandoc and netcat
# RUN python -c "import pypandoc; pypandoc.download_pandoc()"
RUN apt-get update \
&& apt-get install -y pandoc \
&& apt-get install -y pandoc netcat-openbsd \
&& rm -rf /var/lib/apt/lists/*
# RUN python -c "from sentence_transformers import SentenceTransformer; model = SentenceTransformer('all-MiniLM-L6-v2')"
# copy embedding weight from build
RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2
COPY --from=build /app/onnx.tar.gz /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2
RUN cd /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2 &&\
tar -xzf onnx.tar.gz
COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx
# copy built frontend files
COPY --from=build /app/build /app/build
......@@ -55,4 +53,4 @@ COPY --from=build /app/build /app/build
# copy backend files
COPY ./backend .
CMD [ "bash", "start.sh"]
CMD [ "bash", "start.sh"]
\ No newline at end of file
......@@ -24,6 +24,7 @@ from langchain_community.document_loaders import (
UnstructuredMarkdownLoader,
UnstructuredXMLLoader,
UnstructuredRSTLoader,
UnstructuredExcelLoader,
)
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
......@@ -36,7 +37,7 @@ from typing import Optional
import uuid
import time
from utils.misc import calculate_sha256
from utils.misc import calculate_sha256, calculate_sha256_string
from utils.utils import get_current_user
from config import UPLOAD_DIR, EMBED_MODEL, CHROMA_CLIENT, CHUNK_SIZE, CHUNK_OVERLAP
from constants import ERROR_MESSAGES
......@@ -123,10 +124,15 @@ def store_web(form_data: StoreWebForm, user=Depends(get_current_user)):
try:
loader = WebBaseLoader(form_data.url)
data = loader.load()
store_data_in_vector_db(data, form_data.collection_name)
collection_name = form_data.collection_name
if collection_name == "":
collection_name = calculate_sha256_string(form_data.url)[:63]
store_data_in_vector_db(data, collection_name)
return {
"status": True,
"collection_name": form_data.collection_name,
"collection_name": collection_name,
"filename": form_data.url,
}
except Exception as e:
......@@ -137,6 +143,87 @@ def store_web(form_data: StoreWebForm, user=Depends(get_current_user)):
)
def get_loader(file, file_path):
file_ext = file.filename.split(".")[-1].lower()
known_type = True
known_source_ext = [
"go",
"py",
"java",
"sh",
"bat",
"ps1",
"cmd",
"js",
"ts",
"css",
"cpp",
"hpp",
"h",
"c",
"cs",
"sql",
"log",
"ini",
"pl",
"pm",
"r",
"dart",
"dockerfile",
"env",
"php",
"hs",
"hsc",
"lua",
"nginxconf",
"conf",
"m",
"mm",
"plsql",
"perl",
"rb",
"rs",
"db2",
"scala",
"bash",
"swift",
"vue",
"svelte",
]
if file_ext == "pdf":
loader = PyPDFLoader(file_path)
elif file_ext == "csv":
loader = CSVLoader(file_path)
elif file_ext == "rst":
loader = UnstructuredRSTLoader(file_path, mode="elements")
elif file_ext == "xml":
loader = UnstructuredXMLLoader(file_path)
elif file_ext == "md":
loader = UnstructuredMarkdownLoader(file_path)
elif file.content_type == "application/epub+zip":
loader = UnstructuredEPubLoader(file_path)
elif (
file.content_type
== "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
or file_ext in ["doc", "docx"]
):
loader = Docx2txtLoader(file_path)
elif file.content_type in [
"application/vnd.ms-excel",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
] or file_ext in ["xls", "xlsx"]:
loader = UnstructuredExcelLoader(file_path)
elif file_ext in known_source_ext or file.content_type.find("text/") >= 0:
loader = TextLoader(file_path)
else:
loader = TextLoader(file_path)
known_type = False
return loader, known_type
@app.post("/doc")
def store_doc(
collection_name: Optional[str] = Form(None),
......@@ -146,21 +233,6 @@ def store_doc(
# "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm"
print(file.content_type)
text_xml=["xml"]
octet_markdown=["md"]
known_source_ext=[
"go", "py", "java", "sh", "bat", "ps1", "cmd", "js", "ts",
"css", "cpp", "hpp","h", "c", "cs", "sql", "log", "ini",
"pl", "pm", "r", "dart", "dockerfile", "env", "php", "hs",
"hsc", "lua", "nginxconf", "conf", "m", "mm", "plsql", "perl",
"rb", "rs", "db2", "scala", "bash", "swift", "vue", "svelte"
]
docx_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
known_doc_ext=["doc","docx"]
file_ext=file.filename.split(".")[-1].lower()
known_type=True
try:
filename = file.filename
file_path = f"{UPLOAD_DIR}/{filename}"
......@@ -174,27 +246,7 @@ def store_doc(
collection_name = calculate_sha256(f)[:63]
f.close()
if file_ext=="pdf":
loader = PyPDFLoader(file_path)
elif (file.content_type ==docx_type or file_ext in known_doc_ext):
loader = Docx2txtLoader(file_path)
elif file_ext=="csv":
loader = CSVLoader(file_path)
elif file_ext=="rst":
loader = UnstructuredRSTLoader(file_path, mode="elements")
elif file_ext in text_xml:
loader=UnstructuredXMLLoader(file_path)
elif file_ext in known_source_ext or file.content_type.find("text/")>=0:
loader = TextLoader(file_path)
elif file_ext in octet_markdown:
loader = UnstructuredMarkdownLoader(file_path)
elif file.content_type == "application/epub+zip":
loader = UnstructuredEPubLoader(file_path)
else:
loader = TextLoader(file_path)
known_type=False
loader, known_type = get_loader(file, file_path)
data = loader.load()
result = store_data_in_vector_db(data, collection_name)
......@@ -203,7 +255,7 @@ def store_doc(
"status": True,
"collection_name": collection_name,
"filename": filename,
"known_type":known_type,
"known_type": known_type,
}
else:
raise HTTPException(
......
from peewee import *
from config import DATA_DIR
DB = SqliteDatabase("./data/ollama.db")
DB = SqliteDatabase(f"{DATA_DIR}/ollama.db")
DB.connect()
......@@ -63,6 +63,15 @@ class SigninForm(BaseModel):
password: str
class ProfileImageUrlForm(BaseModel):
profile_image_url: str
class UpdateProfileForm(BaseModel):
profile_image_url: str
name: str
class UpdatePasswordForm(BaseModel):
password: str
new_password: str
......
......@@ -65,7 +65,7 @@ class UsersTable:
"name": name,
"email": email,
"role": role,
"profile_image_url": get_gravatar_url(email),
"profile_image_url": "/user.png",
"timestamp": int(time.time()),
}
)
......@@ -108,6 +108,20 @@ class UsersTable:
except:
return None
def update_user_profile_image_url_by_id(
self, id: str, profile_image_url: str
) -> Optional[UserModel]:
try:
query = User.update(profile_image_url=profile_image_url).where(
User.id == id
)
query.execute()
user = User.get(User.id == id)
return UserModel(**model_to_dict(user))
except:
return None
def update_user_by_id(self, id: str, updated: dict) -> Optional[UserModel]:
try:
query = User.update(**updated).where(User.id == id)
......
......@@ -11,6 +11,7 @@ import uuid
from apps.web.models.auths import (
SigninForm,
SignupForm,
UpdateProfileForm,
UpdatePasswordForm,
UserResponse,
SigninResponse,
......@@ -40,14 +41,37 @@ async def get_session_user(user=Depends(get_current_user)):
}
############################
# Update Profile
############################
@router.post("/update/profile", response_model=UserResponse)
async def update_profile(
form_data: UpdateProfileForm, session_user=Depends(get_current_user)
):
if session_user:
user = Users.update_user_by_id(
session_user.id,
{"profile_image_url": form_data.profile_image_url, "name": form_data.name},
)
if user:
return user
else:
raise HTTPException(400, detail=ERROR_MESSAGES.DEFAULT())
else:
raise HTTPException(400, detail=ERROR_MESSAGES.INVALID_CRED)
############################
# Update Password
############################
@router.post("/update/password", response_model=bool)
async def update_password(form_data: UpdatePasswordForm,
session_user=Depends(get_current_user)):
async def update_password(
form_data: UpdatePasswordForm, session_user=Depends(get_current_user)
):
if session_user:
user = Auths.authenticate_user(session_user.email, form_data.password)
......@@ -93,18 +117,19 @@ async def signin(form_data: SigninForm):
async def signup(request: Request, form_data: SignupForm):
if not request.app.state.ENABLE_SIGNUP:
raise HTTPException(400, detail=ERROR_MESSAGES.ACCESS_PROHIBITED)
if not validate_email_format(form_data.email.lower()):
raise HTTPException(400, detail=ERROR_MESSAGES.INVALID_EMAIL_FORMAT)
if Users.get_user_by_email(form_data.email.lower()):
raise HTTPException(400, detail=ERROR_MESSAGES.EMAIL_TAKEN)
try:
role = "admin" if Users.get_num_users() == 0 else "pending"
hashed = get_password_hash(form_data.password)
user = Auths.insert_new_auth(form_data.email.lower(),
hashed, form_data.name, role)
user = Auths.insert_new_auth(
form_data.email.lower(), hashed, form_data.name, role
)
if user:
token = create_token(data={"email": user.email})
......@@ -120,11 +145,10 @@ async def signup(request: Request, form_data: SignupForm):
"profile_image_url": user.profile_image_url,
}
else:
raise HTTPException(
500, detail=ERROR_MESSAGES.CREATE_USER_ERROR)
raise HTTPException(500, detail=ERROR_MESSAGES.CREATE_USER_ERROR)
except Exception as err:
raise HTTPException(500,
detail=ERROR_MESSAGES.DEFAULT(err))
raise HTTPException(500, detail=ERROR_MESSAGES.DEFAULT(err))
############################
# ToggleSignUp
......
......@@ -9,9 +9,9 @@ import os
import aiohttp
import json
from utils.misc import calculate_sha256
from utils.misc import calculate_sha256, get_gravatar_url
from config import OLLAMA_API_BASE_URL
from config import OLLAMA_API_BASE_URL, DATA_DIR, UPLOAD_DIR
from constants import ERROR_MESSAGES
......@@ -96,8 +96,7 @@ async def download(
file_name = parse_huggingface_url(url)
if file_name:
os.makedirs("./uploads", exist_ok=True)
file_path = os.path.join("./uploads", f"{file_name}")
file_path = f"{UPLOAD_DIR}/{file_name}"
return StreamingResponse(
download_file_stream(url, file_path, file_name),
......@@ -109,8 +108,7 @@ async def download(
@router.post("/upload")
def upload(file: UploadFile = File(...)):
os.makedirs("./data/uploads", exist_ok=True)
file_path = os.path.join("./data/uploads", file.filename)
file_path = f"{UPLOAD_DIR}/{file.filename}"
# Save file in chunks
with open(file_path, "wb+") as f:
......@@ -167,3 +165,10 @@ def upload(file: UploadFile = File(...)):
yield f"data: {json.dumps(res)}\n\n"
return StreamingResponse(file_process_stream(), media_type="text/event-stream")
@router.get("/gravatar")
async def get_gravatar(
email: str,
):
return get_gravatar_url(email)
from dotenv import load_dotenv, find_dotenv
import os
import chromadb
from chromadb import Settings
from secrets import token_bytes
from base64 import b64encode
from constants import ERROR_MESSAGES
from pathlib import Path
load_dotenv(find_dotenv("../.env"))
try:
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv("../.env"))
except ImportError:
print("dotenv not installed, skipping...")
####################################
# File Upload
# ENV (dev,test,prod)
####################################
ENV = os.environ.get("ENV", "dev")
UPLOAD_DIR = "./data/uploads"
Path(UPLOAD_DIR).mkdir(parents=True, exist_ok=True)
####################################
# DATA/FRONTEND BUILD DIR
####################################
DATA_DIR = str(Path(os.getenv("DATA_DIR", "./data")).resolve())
FRONTEND_BUILD_DIR = str(Path(os.getenv("FRONTEND_BUILD_DIR", "../build")))
####################################
# ENV (dev,test,prod)
# File Upload DIR
####################################
ENV = os.environ.get("ENV", "dev")
UPLOAD_DIR = f"{DATA_DIR}/uploads"
Path(UPLOAD_DIR).mkdir(parents=True, exist_ok=True)
####################################
# OLLAMA_API_BASE_URL
......@@ -107,7 +110,7 @@ if WEBUI_AUTH and WEBUI_JWT_SECRET_KEY == "":
# RAG
####################################
CHROMA_DATA_PATH = "./data/vector_db"
CHROMA_DATA_PATH = f"{DATA_DIR}/vector_db"
EMBED_MODEL = "all-MiniLM-L6-v2"
CHROMA_CLIENT = chromadb.PersistentClient(
path=CHROMA_DATA_PATH, settings=Settings(allow_reset=True)
......
......@@ -14,7 +14,7 @@ from apps.openai.main import app as openai_app
from apps.web.main import app as webui_app
from apps.rag.main import app as rag_app
from config import ENV
from config import ENV, FRONTEND_BUILD_DIR
class SPAStaticFiles(StaticFiles):
......@@ -58,4 +58,8 @@ app.mount("/openai/api", openai_app)
app.mount("/rag/api/v1", rag_app)
app.mount("/", SPAStaticFiles(directory="../build", html=True), name="spa-static-files")
app.mount(
"/",
SPAStaticFiles(directory=FRONTEND_BUILD_DIR, html=True),
name="spa-static-files",
)
......@@ -25,6 +25,10 @@ docx2txt
unstructured
markdown
pypandoc
pandas
openpyxl
pyxlsb
xlrd
PyJWT
pyjwt[crypto]
......
......@@ -4,4 +4,4 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
cd "$SCRIPT_DIR" || exit
PORT="${PORT:-8080}"
uvicorn main:app --host 0.0.0.0 --port "$PORT" --forwarded-allow-ips '*'
exec uvicorn main:app --host 0.0.0.0 --port "$PORT" --forwarded-allow-ips '*'
......@@ -24,6 +24,16 @@ def calculate_sha256(file):
return sha256.hexdigest()
def calculate_sha256_string(string):
# Create a new SHA-256 hash object
sha256_hash = hashlib.sha256()
# Update the hash object with the bytes of the input string
sha256_hash.update(string.encode("utf-8"))
# Get the hexadecimal representation of the hash
hashed_string = sha256_hash.hexdigest()
return hashed_string
def validate_email_format(email: str) -> bool:
if not re.match(r"[^@]+@[^@]+\.[^@]+", email):
return False
......
# Security Policy
Our primary goal is to ensure the protection and confidentiality of sensitive data stored by users on ollama-webui.
## Supported Versions
| Version | Supported |
| ------- | ------------------ |
| main | :white_check_mark: |
| others | :x: |
## Reporting a Vulnerability
If you discover a security issue within our system, please notify us immediately via a pull request or contact us on discord.
## Product Security
We regularly audit our internal processes and system's architecture for vulnerabilities using a combination of automated and manual testing techniques.
We are planning on implementing SAST and SCA scans in our project soon.
......@@ -11,8 +11,8 @@ TICK='\u2713'
# Detect GPU driver
get_gpu_driver() {
# Detect NVIDIA GPUs
if lspci | grep -i nvidia >/dev/null; then
# Detect NVIDIA GPUs using lspci or nvidia-smi
if lspci | grep -i nvidia >/dev/null || nvidia-smi >/dev/null 2>&1; then
echo "nvidia"
return
fi
......@@ -181,6 +181,9 @@ else
DEFAULT_COMPOSE_COMMAND+=" -f docker-compose.data.yaml"
export OLLAMA_DATA_DIR=$data_dir # Set OLLAMA_DATA_DIR environment variable
fi
if [[ -n $webui_port ]]; then
export OLLAMA_WEBUI_PORT=$webui_port # Set OLLAMA_WEBUI_PORT environment variable
fi
DEFAULT_COMPOSE_COMMAND+=" up -d"
DEFAULT_COMPOSE_COMMAND+=" --remove-orphans"
DEFAULT_COMPOSE_COMMAND+=" --force-recreate"
......
......@@ -10,10 +10,10 @@ docker pull ollama/ollama:latest
docker_args="-d -v ollama:/root/.ollama -p $host_port:$container_port --name ollama ollama/ollama"
if [ "$use_gpu" == "y" ]; then
docker_args+=" --gpus=all"
if [ "$use_gpu" = "y" ]; then
docker_args="--gpus=all $docker_args"
fi
docker run "$docker_args"
docker run $docker_args
docker image prune -f
docker image prune -f
\ No newline at end of file
......@@ -89,6 +89,37 @@ export const userSignUp = async (name: string, email: string, password: string)
return res;
};
export const updateUserProfile = async (token: string, name: string, profileImageUrl: string) => {
let error = null;
const res = await fetch(`${WEBUI_API_BASE_URL}/auths/update/profile`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
...(token && { authorization: `Bearer ${token}` })
},
body: JSON.stringify({
name: name,
profile_image_url: profileImageUrl
})
})
.then(async (res) => {
if (!res.ok) throw await res.json();
return res.json();
})
.catch((err) => {
console.log(err);
error = err.detail;
return null;
});
if (error) {
throw error;
}
return res;
};
export const updateUserPassword = async (token: string, password: string, newPassword: string) => {
let error = null;
......
import { WEBUI_API_BASE_URL } from '$lib/constants';
export const getGravatarUrl = async (email: string) => {
let error = null;
const res = await fetch(`${WEBUI_API_BASE_URL}/utils/gravatar?email=${email}`, {
method: 'GET',
headers: {
'Content-Type': 'application/json'
}
})
.then(async (res) => {
if (!res.ok) throw await res.json();
return res.json();
})
.catch((err) => {
console.log(err);
error = err;
return null;
});
return res;
};
......@@ -6,7 +6,7 @@
import Prompts from './MessageInput/PromptCommands.svelte';
import Suggestions from './MessageInput/Suggestions.svelte';
import { uploadDocToVectorDB } from '$lib/apis/rag';
import { uploadDocToVectorDB, uploadWebToVectorDB } from '$lib/apis/rag';
import AddFilesPlaceholder from '../AddFilesPlaceholder.svelte';
import { SUPPORTED_FILE_TYPE, SUPPORTED_FILE_EXTENSIONS } from '$lib/constants';
import Documents from './MessageInput/Documents.svelte';
......@@ -137,6 +137,33 @@
}
};
const uploadWeb = async (url) => {
console.log(url);
const doc = {
type: 'doc',
name: url,
collection_name: '',
upload_status: false,
error: ''
};
try {
files = [...files, doc];
const res = await uploadWebToVectorDB(localStorage.token, '', url);
if (res) {
doc.upload_status = true;
doc.collection_name = res.collection_name;
files = files;
}
} catch (e) {
// Remove the failed doc from the files array
files = files.filter((f) => f.name !== url);
toast.error(e);
}
};
onMount(() => {
const dropZone = document.querySelector('body');
......@@ -258,6 +285,10 @@
<Documents
bind:this={documentsElement}
bind:prompt
on:url={(e) => {
console.log(e);
uploadWeb(e.detail);
}}
on:select={(e) => {
console.log(e);
files = [
......
......@@ -2,8 +2,9 @@
import { createEventDispatcher } from 'svelte';
import { documents } from '$lib/stores';
import { removeFirstHashWord } from '$lib/utils';
import { removeFirstHashWord, isValidHttpUrl } from '$lib/utils';
import { tick } from 'svelte';
import toast from 'svelte-french-toast';
export let prompt = '';
......@@ -37,9 +38,20 @@
chatInputElement?.focus();
await tick();
};
const confirmSelectWeb = async (url) => {
dispatch('url', url);
prompt = removeFirstHashWord(prompt);
const chatInputElement = document.getElementById('chat-textarea');
await tick();
chatInputElement?.focus();
await tick();
};
</script>
{#if filteredDocs.length > 0}
{#if filteredDocs.length > 0 || prompt.split(' ')?.at(0)?.substring(1).startsWith('http')}
<div class="md:px-2 mb-3 text-left w-full">
<div class="flex w-full rounded-lg border border-gray-100 dark:border-gray-700">
<div class=" bg-gray-100 dark:bg-gray-700 w-10 rounded-l-lg text-center">
......@@ -55,6 +67,7 @@
: ''}"
type="button"
on:click={() => {
console.log(doc);
confirmSelect(doc);
}}
on:mousemove={() => {
......@@ -71,6 +84,29 @@
</div>
</button>
{/each}
{#if prompt.split(' ')?.at(0)?.substring(1).startsWith('http')}
<button
class="px-3 py-1.5 rounded-lg w-full text-left bg-gray-100 selected-command-option-button"
type="button"
on:click={() => {
const url = prompt.split(' ')?.at(0)?.substring(1);
if (isValidHttpUrl(url)) {
confirmSelectWeb(url);
} else {
toast.error(
'Oops! Looks like the URL is invalid. Please double-check and try again.'
);
}
}}
>
<div class=" font-medium text-black line-clamp-1">
{prompt.split(' ')?.at(0)?.substring(1)}
</div>
<div class=" text-xs text-gray-600 line-clamp-1">Web</div>
</button>
{/if}
</div>
</div>
</div>
......
<script lang="ts">
import dayjs from 'dayjs';
import { marked } from 'marked';
import { settings, voices } from '$lib/stores';
import tippy from 'tippy.js';
import auto_render from 'katex/dist/contrib/auto-render.mjs';
import 'katex/dist/katex.min.css';
......@@ -116,6 +117,8 @@
} else {
speaking = true;
const speak = new SpeechSynthesisUtterance(message.content);
const voice = $voices?.filter((v) => v.name === $settings?.speakVoice)?.at(0) ?? undefined;
speak.voice = voice;
speechSynthesis.speak(speak);
}
};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment