Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
open-webui
Commits
7054f028
Unverified
Commit
7054f028
authored
Jan 22, 2024
by
Timothy Jaeryang Baek
Committed by
GitHub
Jan 22, 2024
Browse files
Merge pull request #466 from baumandm/feat/epub-support
feat: Add epub support
parents
d517a3eb
5188bab5
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
38 additions
and
12 deletions
+38
-12
Dockerfile
Dockerfile
+7
-0
backend/apps/rag/main.py
backend/apps/rag/main.py
+13
-4
backend/constants.py
backend/constants.py
+2
-0
backend/requirements.txt
backend/requirements.txt
+2
-1
src/lib/components/chat/MessageInput.svelte
src/lib/components/chat/MessageInput.svelte
+13
-7
src/lib/constants.ts
src/lib/constants.ts
+1
-0
No files found.
Dockerfile
View file @
7054f028
...
@@ -45,6 +45,13 @@ COPY ./backend/requirements.txt ./requirements.txt
...
@@ -45,6 +45,13 @@ COPY ./backend/requirements.txt ./requirements.txt
RUN
pip3
install
torch torchvision torchaudio
--index-url
https://download.pytorch.org/whl/cpu
--no-cache-dir
RUN
pip3
install
torch torchvision torchaudio
--index-url
https://download.pytorch.org/whl/cpu
--no-cache-dir
RUN
pip3
install
-r
requirements.txt
--no-cache-dir
RUN
pip3
install
-r
requirements.txt
--no-cache-dir
# Install pandoc
# RUN python -c "import pypandoc; pypandoc.download_pandoc()"
RUN
apt-get update
\
&&
apt-get
install
-y
pandoc
\
&&
rm
-rf
/var/lib/apt/lists/
*
# RUN python -c "from sentence_transformers import SentenceTransformer; model = SentenceTransformer('all-MiniLM-L6-v2')"
# RUN python -c "from sentence_transformers import SentenceTransformer; model = SentenceTransformer('all-MiniLM-L6-v2')"
COPY
./backend .
COPY
./backend .
...
...
backend/apps/rag/main.py
View file @
7054f028
...
@@ -19,6 +19,7 @@ from langchain_community.document_loaders import (
...
@@ -19,6 +19,7 @@ from langchain_community.document_loaders import (
PyPDFLoader
,
PyPDFLoader
,
CSVLoader
,
CSVLoader
,
Docx2txtLoader
,
Docx2txtLoader
,
UnstructuredEPubLoader
,
UnstructuredWordDocumentLoader
,
UnstructuredWordDocumentLoader
,
UnstructuredMarkdownLoader
,
UnstructuredMarkdownLoader
,
UnstructuredXMLLoader
,
UnstructuredXMLLoader
,
...
@@ -187,6 +188,8 @@ def store_doc(
...
@@ -187,6 +188,8 @@ def store_doc(
loader
=
TextLoader
(
file_path
)
loader
=
TextLoader
(
file_path
)
elif
file_ext
in
octet_markdown
:
elif
file_ext
in
octet_markdown
:
loader
=
UnstructuredMarkdownLoader
(
file_path
)
loader
=
UnstructuredMarkdownLoader
(
file_path
)
elif
file
.
content_type
==
"application/epub+zip"
:
loader
=
UnstructuredEPubLoader
(
file_path
)
else
:
else
:
loader
=
TextLoader
(
file_path
)
loader
=
TextLoader
(
file_path
)
known_type
=
False
known_type
=
False
...
@@ -209,10 +212,16 @@ def store_doc(
...
@@ -209,10 +212,16 @@ def store_doc(
)
)
except
Exception
as
e
:
except
Exception
as
e
:
print
(
e
)
print
(
e
)
raise
HTTPException
(
if
"No pandoc was found"
in
str
(
e
):
status_code
=
status
.
HTTP_400_BAD_REQUEST
,
raise
HTTPException
(
detail
=
ERROR_MESSAGES
.
DEFAULT
(
e
),
status_code
=
status
.
HTTP_400_BAD_REQUEST
,
)
detail
=
ERROR_MESSAGES
.
PANDOC_NOT_INSTALLED
,
)
else
:
raise
HTTPException
(
status_code
=
status
.
HTTP_400_BAD_REQUEST
,
detail
=
ERROR_MESSAGES
.
DEFAULT
(
e
),
)
@
app
.
get
(
"/reset/db"
)
@
app
.
get
(
"/reset/db"
)
...
...
backend/constants.py
View file @
7054f028
...
@@ -42,3 +42,5 @@ class ERROR_MESSAGES(str, Enum):
...
@@ -42,3 +42,5 @@ class ERROR_MESSAGES(str, Enum):
USER_NOT_FOUND
=
"We could not find what you're looking for :/"
USER_NOT_FOUND
=
"We could not find what you're looking for :/"
API_KEY_NOT_FOUND
=
"Oops! It looks like there's a hiccup. The API key is missing. Please make sure to provide a valid API key to access this feature."
API_KEY_NOT_FOUND
=
"Oops! It looks like there's a hiccup. The API key is missing. Please make sure to provide a valid API key to access this feature."
MALICIOUS
=
"Unusual activities detected, please try again in a few minutes."
MALICIOUS
=
"Unusual activities detected, please try again in a few minutes."
PANDOC_NOT_INSTALLED
=
"Pandoc is not installed on the server. Please contact your administrator for assistance."
backend/requirements.txt
View file @
7054f028
...
@@ -24,8 +24,9 @@ pypdf
...
@@ -24,8 +24,9 @@ pypdf
docx2txt
docx2txt
unstructured
unstructured
markdown
markdown
pypandoc
PyJWT
PyJWT
pyjwt[crypto]
pyjwt[crypto]
black
black
\ No newline at end of file
src/lib/components/chat/MessageInput.svelte
View file @
7054f028
...
@@ -121,13 +121,19 @@
...
@@ -121,13 +121,19 @@
error: ''
error: ''
};
};
files = [...files, doc];
try {
const res = await uploadDocToVectorDB(localStorage.token, '', file);
files = [...files, doc];
const res = await uploadDocToVectorDB(localStorage.token, '', file);
if (res) {
doc.upload_status = true;
if (res) {
doc.collection_name = res.collection_name;
doc.upload_status = true;
files = files;
doc.collection_name = res.collection_name;
files = files;
}
} catch (e) {
// Remove the failed doc from the files array
files = files.filter((f) => f.name !== file.name);
toast.error(e);
}
}
};
};
...
...
src/lib/constants.ts
View file @
7054f028
...
@@ -12,6 +12,7 @@ export const WEB_UI_VERSION = 'v1.0.0-alpha-static';
...
@@ -12,6 +12,7 @@ export const WEB_UI_VERSION = 'v1.0.0-alpha-static';
export
const
REQUIRED_OLLAMA_VERSION
=
'
0.1.16
'
;
export
const
REQUIRED_OLLAMA_VERSION
=
'
0.1.16
'
;
export
const
SUPPORTED_FILE_TYPE
=
[
export
const
SUPPORTED_FILE_TYPE
=
[
'
application/epub+zip
'
,
'
application/pdf
'
,
'
application/pdf
'
,
'
text/plain
'
,
'
text/plain
'
,
'
text/csv
'
,
'
text/csv
'
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment