Commit f5487628 authored by Morgan Blangeois's avatar Morgan Blangeois
Browse files

Resolve merge conflicts in French translations

parents 2fedd91e 2c061777
...@@ -10,7 +10,8 @@ node_modules ...@@ -10,7 +10,8 @@ node_modules
vite.config.js.timestamp-* vite.config.js.timestamp-*
vite.config.ts.timestamp-* vite.config.ts.timestamp-*
__pycache__ __pycache__
.env .idea
venv
_old _old
uploads uploads
.ipynb_checkpoints .ipynb_checkpoints
......
...@@ -306,3 +306,4 @@ dist ...@@ -306,3 +306,4 @@ dist
# cypress artifacts # cypress artifacts
cypress/videos cypress/videos
cypress/screenshots cypress/screenshots
.vscode/settings.json
...@@ -14,7 +14,6 @@ from fastapi import ( ...@@ -14,7 +14,6 @@ from fastapi import (
from fastapi.responses import StreamingResponse, JSONResponse, FileResponse from fastapi.responses import StreamingResponse, JSONResponse, FileResponse
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from faster_whisper import WhisperModel
from pydantic import BaseModel from pydantic import BaseModel
import uuid import uuid
...@@ -277,6 +276,8 @@ def transcribe( ...@@ -277,6 +276,8 @@ def transcribe(
f.close() f.close()
if app.state.config.STT_ENGINE == "": if app.state.config.STT_ENGINE == "":
from faster_whisper import WhisperModel
whisper_kwargs = { whisper_kwargs = {
"model_size_or_path": WHISPER_MODEL, "model_size_or_path": WHISPER_MODEL,
"device": whisper_device_type, "device": whisper_device_type,
......
...@@ -12,7 +12,6 @@ from fastapi import ( ...@@ -12,7 +12,6 @@ from fastapi import (
Form, Form,
) )
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from faster_whisper import WhisperModel
from constants import ERROR_MESSAGES from constants import ERROR_MESSAGES
from utils.utils import ( from utils.utils import (
......
...@@ -25,6 +25,7 @@ from utils.task import prompt_template ...@@ -25,6 +25,7 @@ from utils.task import prompt_template
from config import ( from config import (
SRC_LOG_LEVELS, SRC_LOG_LEVELS,
ENABLE_OPENAI_API, ENABLE_OPENAI_API,
AIOHTTP_CLIENT_TIMEOUT,
OPENAI_API_BASE_URLS, OPENAI_API_BASE_URLS,
OPENAI_API_KEYS, OPENAI_API_KEYS,
CACHE_DIR, CACHE_DIR,
...@@ -463,7 +464,9 @@ async def generate_chat_completion( ...@@ -463,7 +464,9 @@ async def generate_chat_completion(
streaming = False streaming = False
try: try:
session = aiohttp.ClientSession(trust_env=True) session = aiohttp.ClientSession(
trust_env=True, timeout=aiohttp.ClientTimeout(total=AIOHTTP_CLIENT_TIMEOUT)
)
r = await session.request( r = await session.request(
method="POST", method="POST",
url=f"{url}/chat/completions", url=f"{url}/chat/completions",
......
...@@ -48,8 +48,6 @@ import mimetypes ...@@ -48,8 +48,6 @@ import mimetypes
import uuid import uuid
import json import json
import sentence_transformers
from apps.webui.models.documents import ( from apps.webui.models.documents import (
Documents, Documents,
DocumentForm, DocumentForm,
...@@ -93,6 +91,8 @@ from config import ( ...@@ -93,6 +91,8 @@ from config import (
SRC_LOG_LEVELS, SRC_LOG_LEVELS,
UPLOAD_DIR, UPLOAD_DIR,
DOCS_DIR, DOCS_DIR,
CONTENT_EXTRACTION_ENGINE,
TIKA_SERVER_URL,
RAG_TOP_K, RAG_TOP_K,
RAG_RELEVANCE_THRESHOLD, RAG_RELEVANCE_THRESHOLD,
RAG_EMBEDDING_ENGINE, RAG_EMBEDDING_ENGINE,
...@@ -148,6 +148,9 @@ app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = ( ...@@ -148,6 +148,9 @@ app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = (
ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION
) )
app.state.config.CONTENT_EXTRACTION_ENGINE = CONTENT_EXTRACTION_ENGINE
app.state.config.TIKA_SERVER_URL = TIKA_SERVER_URL
app.state.config.CHUNK_SIZE = CHUNK_SIZE app.state.config.CHUNK_SIZE = CHUNK_SIZE
app.state.config.CHUNK_OVERLAP = CHUNK_OVERLAP app.state.config.CHUNK_OVERLAP = CHUNK_OVERLAP
...@@ -190,6 +193,8 @@ def update_embedding_model( ...@@ -190,6 +193,8 @@ def update_embedding_model(
update_model: bool = False, update_model: bool = False,
): ):
if embedding_model and app.state.config.RAG_EMBEDDING_ENGINE == "": if embedding_model and app.state.config.RAG_EMBEDDING_ENGINE == "":
import sentence_transformers
app.state.sentence_transformer_ef = sentence_transformers.SentenceTransformer( app.state.sentence_transformer_ef = sentence_transformers.SentenceTransformer(
get_model_path(embedding_model, update_model), get_model_path(embedding_model, update_model),
device=DEVICE_TYPE, device=DEVICE_TYPE,
...@@ -204,6 +209,8 @@ def update_reranking_model( ...@@ -204,6 +209,8 @@ def update_reranking_model(
update_model: bool = False, update_model: bool = False,
): ):
if reranking_model: if reranking_model:
import sentence_transformers
app.state.sentence_transformer_rf = sentence_transformers.CrossEncoder( app.state.sentence_transformer_rf = sentence_transformers.CrossEncoder(
get_model_path(reranking_model, update_model), get_model_path(reranking_model, update_model),
device=DEVICE_TYPE, device=DEVICE_TYPE,
...@@ -388,6 +395,10 @@ async def get_rag_config(user=Depends(get_admin_user)): ...@@ -388,6 +395,10 @@ async def get_rag_config(user=Depends(get_admin_user)):
return { return {
"status": True, "status": True,
"pdf_extract_images": app.state.config.PDF_EXTRACT_IMAGES, "pdf_extract_images": app.state.config.PDF_EXTRACT_IMAGES,
"content_extraction": {
"engine": app.state.config.CONTENT_EXTRACTION_ENGINE,
"tika_server_url": app.state.config.TIKA_SERVER_URL,
},
"chunk": { "chunk": {
"chunk_size": app.state.config.CHUNK_SIZE, "chunk_size": app.state.config.CHUNK_SIZE,
"chunk_overlap": app.state.config.CHUNK_OVERLAP, "chunk_overlap": app.state.config.CHUNK_OVERLAP,
...@@ -417,6 +428,11 @@ async def get_rag_config(user=Depends(get_admin_user)): ...@@ -417,6 +428,11 @@ async def get_rag_config(user=Depends(get_admin_user)):
} }
class ContentExtractionConfig(BaseModel):
engine: str = ""
tika_server_url: Optional[str] = None
class ChunkParamUpdateForm(BaseModel): class ChunkParamUpdateForm(BaseModel):
chunk_size: int chunk_size: int
chunk_overlap: int chunk_overlap: int
...@@ -450,6 +466,7 @@ class WebConfig(BaseModel): ...@@ -450,6 +466,7 @@ class WebConfig(BaseModel):
class ConfigUpdateForm(BaseModel): class ConfigUpdateForm(BaseModel):
pdf_extract_images: Optional[bool] = None pdf_extract_images: Optional[bool] = None
content_extraction: Optional[ContentExtractionConfig] = None
chunk: Optional[ChunkParamUpdateForm] = None chunk: Optional[ChunkParamUpdateForm] = None
youtube: Optional[YoutubeLoaderConfig] = None youtube: Optional[YoutubeLoaderConfig] = None
web: Optional[WebConfig] = None web: Optional[WebConfig] = None
...@@ -463,6 +480,11 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_ ...@@ -463,6 +480,11 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
else app.state.config.PDF_EXTRACT_IMAGES else app.state.config.PDF_EXTRACT_IMAGES
) )
if form_data.content_extraction is not None:
log.info(f"Updating text settings: {form_data.content_extraction}")
app.state.config.CONTENT_EXTRACTION_ENGINE = form_data.content_extraction.engine
app.state.config.TIKA_SERVER_URL = form_data.content_extraction.tika_server_url
if form_data.chunk is not None: if form_data.chunk is not None:
app.state.config.CHUNK_SIZE = form_data.chunk.chunk_size app.state.config.CHUNK_SIZE = form_data.chunk.chunk_size
app.state.config.CHUNK_OVERLAP = form_data.chunk.chunk_overlap app.state.config.CHUNK_OVERLAP = form_data.chunk.chunk_overlap
...@@ -499,6 +521,10 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_ ...@@ -499,6 +521,10 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
return { return {
"status": True, "status": True,
"pdf_extract_images": app.state.config.PDF_EXTRACT_IMAGES, "pdf_extract_images": app.state.config.PDF_EXTRACT_IMAGES,
"content_extraction": {
"engine": app.state.config.CONTENT_EXTRACTION_ENGINE,
"tika_server_url": app.state.config.TIKA_SERVER_URL,
},
"chunk": { "chunk": {
"chunk_size": app.state.config.CHUNK_SIZE, "chunk_size": app.state.config.CHUNK_SIZE,
"chunk_overlap": app.state.config.CHUNK_OVERLAP, "chunk_overlap": app.state.config.CHUNK_OVERLAP,
...@@ -985,6 +1011,41 @@ def store_docs_in_vector_db(docs, collection_name, overwrite: bool = False) -> b ...@@ -985,6 +1011,41 @@ def store_docs_in_vector_db(docs, collection_name, overwrite: bool = False) -> b
return False return False
class TikaLoader:
def __init__(self, file_path, mime_type=None):
self.file_path = file_path
self.mime_type = mime_type
def load(self) -> List[Document]:
with open(self.file_path, "rb") as f:
data = f.read()
if self.mime_type is not None:
headers = {"Content-Type": self.mime_type}
else:
headers = {}
endpoint = app.state.config.TIKA_SERVER_URL
if not endpoint.endswith("/"):
endpoint += "/"
endpoint += "tika/text"
r = requests.put(endpoint, data=data, headers=headers)
if r.ok:
raw_metadata = r.json()
text = raw_metadata.get("X-TIKA:content", "<No text content found>")
if "Content-Type" in raw_metadata:
headers["Content-Type"] = raw_metadata["Content-Type"]
log.info("Tika extracted text: %s", text)
return [Document(page_content=text, metadata=headers)]
else:
raise Exception(f"Error calling Tika: {r.reason}")
def get_loader(filename: str, file_content_type: str, file_path: str): def get_loader(filename: str, file_content_type: str, file_path: str):
file_ext = filename.split(".")[-1].lower() file_ext = filename.split(".")[-1].lower()
known_type = True known_type = True
...@@ -1035,47 +1096,58 @@ def get_loader(filename: str, file_content_type: str, file_path: str): ...@@ -1035,47 +1096,58 @@ def get_loader(filename: str, file_content_type: str, file_path: str):
"msg", "msg",
] ]
if file_ext == "pdf": if (
loader = PyPDFLoader( app.state.config.CONTENT_EXTRACTION_ENGINE == "tika"
file_path, extract_images=app.state.config.PDF_EXTRACT_IMAGES and app.state.config.TIKA_SERVER_URL
)
elif file_ext == "csv":
loader = CSVLoader(file_path)
elif file_ext == "rst":
loader = UnstructuredRSTLoader(file_path, mode="elements")
elif file_ext == "xml":
loader = UnstructuredXMLLoader(file_path)
elif file_ext in ["htm", "html"]:
loader = BSHTMLLoader(file_path, open_encoding="unicode_escape")
elif file_ext == "md":
loader = UnstructuredMarkdownLoader(file_path)
elif file_content_type == "application/epub+zip":
loader = UnstructuredEPubLoader(file_path)
elif (
file_content_type
== "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
or file_ext in ["doc", "docx"]
): ):
loader = Docx2txtLoader(file_path) if file_ext in known_source_ext or (
elif file_content_type in [ file_content_type and file_content_type.find("text/") >= 0
"application/vnd.ms-excel", ):
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", loader = TextLoader(file_path, autodetect_encoding=True)
] or file_ext in ["xls", "xlsx"]: else:
loader = UnstructuredExcelLoader(file_path) loader = TikaLoader(file_path, file_content_type)
elif file_content_type in [
"application/vnd.ms-powerpoint",
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
] or file_ext in ["ppt", "pptx"]:
loader = UnstructuredPowerPointLoader(file_path)
elif file_ext == "msg":
loader = OutlookMessageLoader(file_path)
elif file_ext in known_source_ext or (
file_content_type and file_content_type.find("text/") >= 0
):
loader = TextLoader(file_path, autodetect_encoding=True)
else: else:
loader = TextLoader(file_path, autodetect_encoding=True) if file_ext == "pdf":
known_type = False loader = PyPDFLoader(
file_path, extract_images=app.state.config.PDF_EXTRACT_IMAGES
)
elif file_ext == "csv":
loader = CSVLoader(file_path)
elif file_ext == "rst":
loader = UnstructuredRSTLoader(file_path, mode="elements")
elif file_ext == "xml":
loader = UnstructuredXMLLoader(file_path)
elif file_ext in ["htm", "html"]:
loader = BSHTMLLoader(file_path, open_encoding="unicode_escape")
elif file_ext == "md":
loader = UnstructuredMarkdownLoader(file_path)
elif file_content_type == "application/epub+zip":
loader = UnstructuredEPubLoader(file_path)
elif (
file_content_type
== "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
or file_ext in ["doc", "docx"]
):
loader = Docx2txtLoader(file_path)
elif file_content_type in [
"application/vnd.ms-excel",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
] or file_ext in ["xls", "xlsx"]:
loader = UnstructuredExcelLoader(file_path)
elif file_content_type in [
"application/vnd.ms-powerpoint",
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
] or file_ext in ["ppt", "pptx"]:
loader = UnstructuredPowerPointLoader(file_path)
elif file_ext == "msg":
loader = OutlookMessageLoader(file_path)
elif file_ext in known_source_ext or (
file_content_type and file_content_type.find("text/") >= 0
):
loader = TextLoader(file_path, autodetect_encoding=True)
else:
loader = TextLoader(file_path, autodetect_encoding=True)
known_type = False
return loader, known_type return loader, known_type
......
...@@ -294,14 +294,16 @@ def get_rag_context( ...@@ -294,14 +294,16 @@ def get_rag_context(
extracted_collections.extend(collection_names) extracted_collections.extend(collection_names)
context_string = "" contexts = []
citations = [] citations = []
for context in relevant_contexts: for context in relevant_contexts:
try: try:
if "documents" in context: if "documents" in context:
context_string += "\n\n".join( contexts.append(
[text for text in context["documents"][0] if text is not None] "\n\n".join(
[text for text in context["documents"][0] if text is not None]
)
) )
if "metadatas" in context: if "metadatas" in context:
...@@ -315,9 +317,7 @@ def get_rag_context( ...@@ -315,9 +317,7 @@ def get_rag_context(
except Exception as e: except Exception as e:
log.exception(e) log.exception(e)
context_string = context_string.strip() return contexts, citations
return context_string, citations
def get_model_path(model: str, update_model: bool = False): def get_model_path(model: str, update_model: bool = False):
...@@ -442,8 +442,6 @@ from langchain_core.documents import BaseDocumentCompressor, Document ...@@ -442,8 +442,6 @@ from langchain_core.documents import BaseDocumentCompressor, Document
from langchain_core.callbacks import Callbacks from langchain_core.callbacks import Callbacks
from langchain_core.pydantic_v1 import Extra from langchain_core.pydantic_v1 import Extra
from sentence_transformers import util
class RerankCompressor(BaseDocumentCompressor): class RerankCompressor(BaseDocumentCompressor):
embedding_function: Any embedding_function: Any
...@@ -468,6 +466,8 @@ class RerankCompressor(BaseDocumentCompressor): ...@@ -468,6 +466,8 @@ class RerankCompressor(BaseDocumentCompressor):
[(query, doc.page_content) for doc in documents] [(query, doc.page_content) for doc in documents]
) )
else: else:
from sentence_transformers import util
query_embedding = self.embedding_function(query) query_embedding = self.embedding_function(query)
document_embedding = self.embedding_function( document_embedding = self.embedding_function(
[doc.page_content for doc in documents] [doc.page_content for doc in documents]
......
...@@ -259,6 +259,9 @@ async def generate_function_chat_completion(form_data, user): ...@@ -259,6 +259,9 @@ async def generate_function_chat_completion(form_data, user):
if isinstance(line, BaseModel): if isinstance(line, BaseModel):
line = line.model_dump_json() line = line.model_dump_json()
line = f"data: {line}" line = f"data: {line}"
if isinstance(line, dict):
line = f"data: {json.dumps(line)}"
try: try:
line = line.decode("utf-8") line = line.decode("utf-8")
except: except:
......
...@@ -214,8 +214,7 @@ class FunctionsTable: ...@@ -214,8 +214,7 @@ class FunctionsTable:
user_settings["functions"]["valves"][id] = valves user_settings["functions"]["valves"][id] = valves
# Update the user settings in the database # Update the user settings in the database
query = Users.update_user_by_id(user_id, {"settings": user_settings}) Users.update_user_by_id(user_id, {"settings": user_settings})
query.execute()
return user_settings["functions"]["valves"][id] return user_settings["functions"]["valves"][id]
except Exception as e: except Exception as e:
......
...@@ -170,8 +170,7 @@ class ToolsTable: ...@@ -170,8 +170,7 @@ class ToolsTable:
user_settings["tools"]["valves"][id] = valves user_settings["tools"]["valves"][id] = valves
# Update the user settings in the database # Update the user settings in the database
query = Users.update_user_by_id(user_id, {"settings": user_settings}) Users.update_user_by_id(user_id, {"settings": user_settings})
query.execute()
return user_settings["tools"]["valves"][id] return user_settings["tools"]["valves"][id]
except Exception as e: except Exception as e:
......
...@@ -5,9 +5,8 @@ import importlib.metadata ...@@ -5,9 +5,8 @@ import importlib.metadata
import pkgutil import pkgutil
import chromadb import chromadb
from chromadb import Settings from chromadb import Settings
from base64 import b64encode
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from typing import TypeVar, Generic, Union from typing import TypeVar, Generic
from pydantic import BaseModel from pydantic import BaseModel
from typing import Optional from typing import Optional
...@@ -19,7 +18,6 @@ import markdown ...@@ -19,7 +18,6 @@ import markdown
import requests import requests
import shutil import shutil
from secrets import token_bytes
from constants import ERROR_MESSAGES from constants import ERROR_MESSAGES
#################################### ####################################
...@@ -768,12 +766,14 @@ class BannerModel(BaseModel): ...@@ -768,12 +766,14 @@ class BannerModel(BaseModel):
dismissible: bool dismissible: bool
timestamp: int timestamp: int
try:
banners = json.loads(os.environ.get("WEBUI_BANNERS", "[]"))
banners = [BannerModel(**banner) for banner in banners]
except Exception as e:
print(f"Error loading WEBUI_BANNERS: {e}")
banners = []
WEBUI_BANNERS = PersistentConfig( WEBUI_BANNERS = PersistentConfig("WEBUI_BANNERS", "ui.banners", banners)
"WEBUI_BANNERS",
"ui.banners",
[BannerModel(**banner) for banner in json.loads("[]")],
)
SHOW_ADMIN_DETAILS = PersistentConfig( SHOW_ADMIN_DETAILS = PersistentConfig(
...@@ -885,6 +885,22 @@ WEBUI_SESSION_COOKIE_SECURE = os.environ.get( ...@@ -885,6 +885,22 @@ WEBUI_SESSION_COOKIE_SECURE = os.environ.get(
if WEBUI_AUTH and WEBUI_SECRET_KEY == "": if WEBUI_AUTH and WEBUI_SECRET_KEY == "":
raise ValueError(ERROR_MESSAGES.ENV_VAR_NOT_FOUND) raise ValueError(ERROR_MESSAGES.ENV_VAR_NOT_FOUND)
####################################
# RAG document content extraction
####################################
CONTENT_EXTRACTION_ENGINE = PersistentConfig(
"CONTENT_EXTRACTION_ENGINE",
"rag.CONTENT_EXTRACTION_ENGINE",
os.environ.get("CONTENT_EXTRACTION_ENGINE", "").lower(),
)
TIKA_SERVER_URL = PersistentConfig(
"TIKA_SERVER_URL",
"rag.tika_server_url",
os.getenv("TIKA_SERVER_URL", "http://tika:9998"), # Default for sidecar deployment
)
#################################### ####################################
# RAG # RAG
#################################### ####################################
......
This diff is collapsed.
...@@ -10,7 +10,7 @@ python-socketio==5.11.3 ...@@ -10,7 +10,7 @@ python-socketio==5.11.3
python-jose==3.3.0 python-jose==3.3.0
passlib[bcrypt]==1.7.4 passlib[bcrypt]==1.7.4
requests==2.32.2 requests==2.32.3
aiohttp==3.9.5 aiohttp==3.9.5
peewee==3.17.5 peewee==3.17.5
peewee-migrate==1.12.2 peewee-migrate==1.12.2
...@@ -30,21 +30,21 @@ openai ...@@ -30,21 +30,21 @@ openai
anthropic anthropic
google-generativeai==0.5.4 google-generativeai==0.5.4
langchain==0.2.0 langchain==0.2.6
langchain-community==0.2.0 langchain-community==0.2.6
langchain-chroma==0.1.2 langchain-chroma==0.1.2
fake-useragent==1.5.1 fake-useragent==1.5.1
chromadb==0.5.3 chromadb==0.5.3
sentence-transformers==2.7.0 sentence-transformers==3.0.1
pypdf==4.2.0 pypdf==4.2.0
docx2txt==0.8 docx2txt==0.8
python-pptx==0.6.23 python-pptx==0.6.23
unstructured==0.14.0 unstructured==0.14.9
Markdown==3.6 Markdown==3.6
pypandoc==1.13 pypandoc==1.13
pandas==2.2.2 pandas==2.2.2
openpyxl==3.1.2 openpyxl==3.1.5
pyxlsb==1.0.10 pyxlsb==1.0.10
xlrd==2.0.1 xlrd==2.0.1
validators==0.28.1 validators==0.28.1
...@@ -61,7 +61,7 @@ PyJWT[crypto]==2.8.0 ...@@ -61,7 +61,7 @@ PyJWT[crypto]==2.8.0
authlib==1.3.1 authlib==1.3.1
black==24.4.2 black==24.4.2
langfuse==2.33.0 langfuse==2.36.2
youtube-transcript-api==0.6.2 youtube-transcript-api==0.6.2
pytube==15.0.0 pytube==15.0.0
......
...@@ -8,14 +8,22 @@ import uuid ...@@ -8,14 +8,22 @@ import uuid
import time import time
def get_last_user_message(messages: List[dict]) -> str: def get_last_user_message_item(messages: List[dict]) -> str:
for message in reversed(messages): for message in reversed(messages):
if message["role"] == "user": if message["role"] == "user":
if isinstance(message["content"], list): return message
for item in message["content"]: return None
if item["type"] == "text":
return item["text"]
return message["content"] def get_last_user_message(messages: List[dict]) -> str:
message = get_last_user_message_item(messages)
if message is not None:
if isinstance(message["content"], list):
for item in message["content"]:
if item["type"] == "text":
return item["text"]
return message["content"]
return None return None
......
{ {
"name": "open-webui", "name": "open-webui",
"version": "0.3.7", "version": "0.3.8",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "open-webui", "name": "open-webui",
"version": "0.3.7", "version": "0.3.8",
"dependencies": { "dependencies": {
"@codemirror/lang-javascript": "^6.2.2", "@codemirror/lang-javascript": "^6.2.2",
"@codemirror/lang-python": "^6.1.6", "@codemirror/lang-python": "^6.1.6",
......
{ {
"name": "open-webui", "name": "open-webui",
"version": "0.3.7", "version": "0.3.8",
"private": true, "private": true,
"scripts": { "scripts": {
"dev": "npm run pyodide:fetch && vite dev --host", "dev": "npm run pyodide:fetch && vite dev --host",
......
...@@ -32,6 +32,11 @@ type ChunkConfigForm = { ...@@ -32,6 +32,11 @@ type ChunkConfigForm = {
chunk_overlap: number; chunk_overlap: number;
}; };
type ContentExtractConfigForm = {
engine: string;
tika_server_url: string | null;
};
type YoutubeConfigForm = { type YoutubeConfigForm = {
language: string[]; language: string[];
translation?: string | null; translation?: string | null;
...@@ -40,6 +45,7 @@ type YoutubeConfigForm = { ...@@ -40,6 +45,7 @@ type YoutubeConfigForm = {
type RAGConfigForm = { type RAGConfigForm = {
pdf_extract_images?: boolean; pdf_extract_images?: boolean;
chunk?: ChunkConfigForm; chunk?: ChunkConfigForm;
content_extraction?: ContentExtractConfigForm;
web_loader_ssl_verification?: boolean; web_loader_ssl_verification?: boolean;
youtube?: YoutubeConfigForm; youtube?: YoutubeConfigForm;
}; };
......
...@@ -37,6 +37,10 @@ ...@@ -37,6 +37,10 @@
let embeddingModel = ''; let embeddingModel = '';
let rerankingModel = ''; let rerankingModel = '';
let contentExtractionEngine = 'default';
let tikaServerUrl = '';
let showTikaServerUrl = false;
let chunkSize = 0; let chunkSize = 0;
let chunkOverlap = 0; let chunkOverlap = 0;
let pdfExtractImages = true; let pdfExtractImages = true;
...@@ -163,11 +167,20 @@ ...@@ -163,11 +167,20 @@
rerankingModelUpdateHandler(); rerankingModelUpdateHandler();
} }
if (contentExtractionEngine === 'tika' && tikaServerUrl === '') {
toast.error($i18n.t('Tika Server URL required.'));
return;
}
const res = await updateRAGConfig(localStorage.token, { const res = await updateRAGConfig(localStorage.token, {
pdf_extract_images: pdfExtractImages, pdf_extract_images: pdfExtractImages,
chunk: { chunk: {
chunk_overlap: chunkOverlap, chunk_overlap: chunkOverlap,
chunk_size: chunkSize chunk_size: chunkSize
},
content_extraction: {
engine: contentExtractionEngine,
tika_server_url: tikaServerUrl
} }
}); });
...@@ -213,6 +226,10 @@ ...@@ -213,6 +226,10 @@
chunkSize = res.chunk.chunk_size; chunkSize = res.chunk.chunk_size;
chunkOverlap = res.chunk.chunk_overlap; chunkOverlap = res.chunk.chunk_overlap;
contentExtractionEngine = res.content_extraction.engine;
tikaServerUrl = res.content_extraction.tika_server_url;
showTikaServerUrl = contentExtractionEngine === 'tika';
} }
}); });
</script> </script>
...@@ -388,7 +405,7 @@ ...@@ -388,7 +405,7 @@
</div> </div>
</div> </div>
<hr class=" dark:border-gray-850 my-1" /> <hr class="dark:border-gray-850" />
<div class="space-y-2" /> <div class="space-y-2" />
<div> <div>
...@@ -562,6 +579,39 @@ ...@@ -562,6 +579,39 @@
<hr class=" dark:border-gray-850" /> <hr class=" dark:border-gray-850" />
<div class="">
<div class="text-sm font-medium">{$i18n.t('Content Extraction')}</div>
<div class="flex w-full justify-between mt-2">
<div class="self-center text-xs font-medium">{$i18n.t('Engine')}</div>
<div class="flex items-center relative">
<select
class="dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
bind:value={contentExtractionEngine}
on:change={(e) => {
showTikaServerUrl = e.target.value === 'tika';
}}
>
<option value="">{$i18n.t('Default')} </option>
<option value="tika">{$i18n.t('Tika')}</option>
</select>
</div>
</div>
{#if showTikaServerUrl}
<div class="flex w-full mt-2">
<div class="flex-1 mr-2">
<input
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
placeholder={$i18n.t('Enter Tika Server URL')}
bind:value={tikaServerUrl}
/>
</div>
</div>
{/if}
</div>
<hr class=" dark:border-gray-850" />
<div class=" "> <div class=" ">
<div class=" text-sm font-medium">{$i18n.t('Query Params')}</div> <div class=" text-sm font-medium">{$i18n.t('Query Params')}</div>
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
} from '$lib/apis'; } from '$lib/apis';
import Spinner from '$lib/components/common/Spinner.svelte'; import Spinner from '$lib/components/common/Spinner.svelte';
import Switch from '$lib/components/common/Switch.svelte';
const i18n: Writable<i18nType> = getContext('i18n'); const i18n: Writable<i18nType> = getContext('i18n');
...@@ -476,15 +477,40 @@ ...@@ -476,15 +477,40 @@
</div> </div>
{#if (valves[property] ?? null) !== null} {#if (valves[property] ?? null) !== null}
<div class="flex mt-0.5 space-x-2"> <!-- {valves[property]} -->
<div class="flex mt-0.5 mb-1.5 space-x-2">
<div class=" flex-1"> <div class=" flex-1">
<input {#if valves_spec.properties[property]?.enum ?? null}
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none" <select
type="text" class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
placeholder={valves_spec.properties[property].title} bind:value={valves[property]}
bind:value={valves[property]} >
autocomplete="off" {#each valves_spec.properties[property].enum as option}
/> <option value={option} selected={option === valves[property]}>
{option}
</option>
{/each}
</select>
{:else if (valves_spec.properties[property]?.type ?? null) === 'boolean'}
<div class="flex justify-between items-center">
<div class="text-xs text-gray-500">
{valves[property] ? 'Enabled' : 'Disabled'}
</div>
<div class=" pr-2">
<Switch bind:state={valves[property]} />
</div>
</div>
{:else}
<input
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
type="text"
placeholder={valves_spec.properties[property].title}
bind:value={valves[property]}
autocomplete="off"
required
/>
{/if}
</div> </div>
</div> </div>
{/if} {/if}
......
...@@ -126,6 +126,27 @@ ...@@ -126,6 +126,27 @@
})(); })();
} }
const chatEventHandler = async (data) => {
if (data.chat_id === $chatId) {
await tick();
console.log(data);
let message = history.messages[data.message_id];
const status = {
done: data?.data?.done ?? null,
description: data?.data?.status ?? null
};
if (message.statusHistory) {
message.statusHistory.push(status);
} else {
message.statusHistory = [status];
}
messages = messages;
}
};
onMount(async () => { onMount(async () => {
const onMessageHandler = async (event) => { const onMessageHandler = async (event) => {
if (event.origin === window.origin) { if (event.origin === window.origin) {
...@@ -163,6 +184,8 @@ ...@@ -163,6 +184,8 @@
}; };
window.addEventListener('message', onMessageHandler); window.addEventListener('message', onMessageHandler);
$socket.on('chat-events', chatEventHandler);
if (!$chatId) { if (!$chatId) {
chatId.subscribe(async (value) => { chatId.subscribe(async (value) => {
if (!value) { if (!value) {
...@@ -177,6 +200,8 @@ ...@@ -177,6 +200,8 @@
return () => { return () => {
window.removeEventListener('message', onMessageHandler); window.removeEventListener('message', onMessageHandler);
$socket.off('chat-events');
}; };
}); });
...@@ -302,7 +327,7 @@ ...@@ -302,7 +327,7 @@
} }
}; };
const chatCompletedHandler = async (modelId, messages) => { const chatCompletedHandler = async (modelId, responseMessageId, messages) => {
await mermaid.run({ await mermaid.run({
querySelector: '.mermaid' querySelector: '.mermaid'
}); });
...@@ -316,7 +341,9 @@ ...@@ -316,7 +341,9 @@
info: m.info ? m.info : undefined, info: m.info ? m.info : undefined,
timestamp: m.timestamp timestamp: m.timestamp
})), })),
chat_id: $chatId chat_id: $chatId,
session_id: $socket?.id,
id: responseMessageId
}).catch((error) => { }).catch((error) => {
toast.error(error); toast.error(error);
messages.at(-1).error = { content: error }; messages.at(-1).error = { content: error };
...@@ -665,6 +692,7 @@ ...@@ -665,6 +692,7 @@
await tick(); await tick();
const [res, controller] = await generateChatCompletion(localStorage.token, { const [res, controller] = await generateChatCompletion(localStorage.token, {
stream: true,
model: model.id, model: model.id,
messages: messagesBody, messages: messagesBody,
options: { options: {
...@@ -682,8 +710,9 @@ ...@@ -682,8 +710,9 @@
keep_alive: $settings.keepAlive ?? undefined, keep_alive: $settings.keepAlive ?? undefined,
tool_ids: selectedToolIds.length > 0 ? selectedToolIds : undefined, tool_ids: selectedToolIds.length > 0 ? selectedToolIds : undefined,
files: files.length > 0 ? files : undefined, files: files.length > 0 ? files : undefined,
citations: files.length > 0 ? true : undefined, session_id: $socket?.id,
chat_id: $chatId chat_id: $chatId,
id: responseMessageId
}); });
if (res && res.ok) { if (res && res.ok) {
...@@ -704,7 +733,7 @@ ...@@ -704,7 +733,7 @@
controller.abort('User: Stop Response'); controller.abort('User: Stop Response');
} else { } else {
const messages = createMessagesList(responseMessageId); const messages = createMessagesList(responseMessageId);
await chatCompletedHandler(model.id, messages); await chatCompletedHandler(model.id, responseMessageId, messages);
} }
_response = responseMessage.content; _response = responseMessage.content;
...@@ -912,8 +941,8 @@ ...@@ -912,8 +941,8 @@
const [res, controller] = await generateOpenAIChatCompletion( const [res, controller] = await generateOpenAIChatCompletion(
localStorage.token, localStorage.token,
{ {
model: model.id,
stream: true, stream: true,
model: model.id,
stream_options: stream_options:
model.info?.meta?.capabilities?.usage ?? false model.info?.meta?.capabilities?.usage ?? false
? { ? {
...@@ -983,9 +1012,9 @@ ...@@ -983,9 +1012,9 @@
max_tokens: $settings?.params?.max_tokens ?? undefined, max_tokens: $settings?.params?.max_tokens ?? undefined,
tool_ids: selectedToolIds.length > 0 ? selectedToolIds : undefined, tool_ids: selectedToolIds.length > 0 ? selectedToolIds : undefined,
files: files.length > 0 ? files : undefined, files: files.length > 0 ? files : undefined,
citations: files.length > 0 ? true : undefined, session_id: $socket?.id,
chat_id: $chatId,
chat_id: $chatId id: responseMessageId
}, },
`${WEBUI_BASE_URL}/api` `${WEBUI_BASE_URL}/api`
); );
...@@ -1014,7 +1043,7 @@ ...@@ -1014,7 +1043,7 @@
} else { } else {
const messages = createMessagesList(responseMessageId); const messages = createMessagesList(responseMessageId);
await chatCompletedHandler(model.id, messages); await chatCompletedHandler(model.id, responseMessageId, messages);
} }
_response = responseMessage.content; _response = responseMessage.content;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment