Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
open-webui
Commits
7054f028
Unverified
Commit
7054f028
authored
Jan 22, 2024
by
Timothy Jaeryang Baek
Committed by
GitHub
Jan 22, 2024
Browse files
Merge pull request #466 from baumandm/feat/epub-support
feat: Add epub support
parents
d517a3eb
5188bab5
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
38 additions
and
12 deletions
+38
-12
Dockerfile
Dockerfile
+7
-0
backend/apps/rag/main.py
backend/apps/rag/main.py
+13
-4
backend/constants.py
backend/constants.py
+2
-0
backend/requirements.txt
backend/requirements.txt
+2
-1
src/lib/components/chat/MessageInput.svelte
src/lib/components/chat/MessageInput.svelte
+13
-7
src/lib/constants.ts
src/lib/constants.ts
+1
-0
No files found.
Dockerfile
View file @
7054f028
...
...
@@ -45,6 +45,13 @@ COPY ./backend/requirements.txt ./requirements.txt
RUN
pip3
install
torch torchvision torchaudio
--index-url
https://download.pytorch.org/whl/cpu
--no-cache-dir
RUN
pip3
install
-r
requirements.txt
--no-cache-dir
# Install pandoc
# RUN python -c "import pypandoc; pypandoc.download_pandoc()"
RUN
apt-get update
\
&&
apt-get
install
-y
pandoc
\
&&
rm
-rf
/var/lib/apt/lists/
*
# RUN python -c "from sentence_transformers import SentenceTransformer; model = SentenceTransformer('all-MiniLM-L6-v2')"
COPY
./backend .
...
...
backend/apps/rag/main.py
View file @
7054f028
...
...
@@ -19,6 +19,7 @@ from langchain_community.document_loaders import (
PyPDFLoader
,
CSVLoader
,
Docx2txtLoader
,
UnstructuredEPubLoader
,
UnstructuredWordDocumentLoader
,
UnstructuredMarkdownLoader
,
UnstructuredXMLLoader
,
...
...
@@ -187,6 +188,8 @@ def store_doc(
loader
=
TextLoader
(
file_path
)
elif
file_ext
in
octet_markdown
:
loader
=
UnstructuredMarkdownLoader
(
file_path
)
elif
file
.
content_type
==
"application/epub+zip"
:
loader
=
UnstructuredEPubLoader
(
file_path
)
else
:
loader
=
TextLoader
(
file_path
)
known_type
=
False
...
...
@@ -209,6 +212,12 @@ def store_doc(
)
except
Exception
as
e
:
print
(
e
)
if
"No pandoc was found"
in
str
(
e
):
raise
HTTPException
(
status_code
=
status
.
HTTP_400_BAD_REQUEST
,
detail
=
ERROR_MESSAGES
.
PANDOC_NOT_INSTALLED
,
)
else
:
raise
HTTPException
(
status_code
=
status
.
HTTP_400_BAD_REQUEST
,
detail
=
ERROR_MESSAGES
.
DEFAULT
(
e
),
...
...
backend/constants.py
View file @
7054f028
...
...
@@ -42,3 +42,5 @@ class ERROR_MESSAGES(str, Enum):
USER_NOT_FOUND
=
"We could not find what you're looking for :/"
API_KEY_NOT_FOUND
=
"Oops! It looks like there's a hiccup. The API key is missing. Please make sure to provide a valid API key to access this feature."
MALICIOUS
=
"Unusual activities detected, please try again in a few minutes."
PANDOC_NOT_INSTALLED
=
"Pandoc is not installed on the server. Please contact your administrator for assistance."
backend/requirements.txt
View file @
7054f028
...
...
@@ -24,6 +24,7 @@ pypdf
docx2txt
unstructured
markdown
pypandoc
PyJWT
pyjwt[crypto]
...
...
src/lib/components/chat/MessageInput.svelte
View file @
7054f028
...
...
@@ -121,6 +121,7 @@
error: ''
};
try {
files = [...files, doc];
const res = await uploadDocToVectorDB(localStorage.token, '', file);
...
...
@@ -129,6 +130,11 @@
doc.collection_name = res.collection_name;
files = files;
}
} catch (e) {
// Remove the failed doc from the files array
files = files.filter((f) => f.name !== file.name);
toast.error(e);
}
};
onMount(() => {
...
...
src/lib/constants.ts
View file @
7054f028
...
...
@@ -12,6 +12,7 @@ export const WEB_UI_VERSION = 'v1.0.0-alpha-static';
export
const
REQUIRED_OLLAMA_VERSION
=
'
0.1.16
'
;
export
const
SUPPORTED_FILE_TYPE
=
[
'
application/epub+zip
'
,
'
application/pdf
'
,
'
text/plain
'
,
'
text/csv
'
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment