Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
open-webui
Commits
1633ba44
Unverified
Commit
1633ba44
authored
Jan 09, 2024
by
Timothy Jaeryang Baek
Committed by
GitHub
Jan 09, 2024
Browse files
Merge pull request #441 from ollama-webui/rag
feat: rag md support
parents
358f79f5
ffba59dc
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
23 additions
and
1 deletion
+23
-1
backend/apps/rag/main.py
backend/apps/rag/main.py
+15
-0
backend/requirements.txt
backend/requirements.txt
+2
-0
src/lib/components/chat/MessageInput.svelte
src/lib/components/chat/MessageInput.svelte
+5
-1
src/lib/constants.ts
src/lib/constants.ts
+1
-0
No files found.
backend/apps/rag/main.py
View file @
1633ba44
...
...
@@ -19,6 +19,8 @@ from langchain_community.document_loaders import (
PyPDFLoader
,
CSVLoader
,
Docx2txtLoader
,
UnstructuredWordDocumentLoader
,
UnstructuredMarkdownLoader
,
)
from
langchain.text_splitter
import
RecursiveCharacterTextSplitter
from
langchain_community.vectorstores
import
Chroma
...
...
@@ -140,17 +142,27 @@ def store_doc(
):
# "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm"
print
(
file
.
content_type
)
if
file
.
content_type
not
in
[
"application/pdf"
,
"text/plain"
,
"text/csv"
,
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
,
"application/octet-stream"
,
]:
raise
HTTPException
(
status_code
=
status
.
HTTP_400_BAD_REQUEST
,
detail
=
ERROR_MESSAGES
.
FILE_NOT_SUPPORTED
,
)
if
file
.
content_type
==
"application/octet-stream"
and
file
.
filename
.
split
(
"."
)[
-
1
]
not
in
[
"md"
]:
raise
HTTPException
(
status_code
=
status
.
HTTP_400_BAD_REQUEST
,
detail
=
ERROR_MESSAGES
.
FILE_NOT_SUPPORTED
,
)
try
:
filename
=
file
.
filename
file_path
=
f
"
{
UPLOAD_DIR
}
/
{
filename
}
"
...
...
@@ -175,6 +187,9 @@ def store_doc(
loader
=
TextLoader
(
file_path
)
elif
file
.
content_type
==
"text/csv"
:
loader
=
CSVLoader
(
file_path
)
elif
file
.
content_type
==
"application/octet-stream"
:
if
file
.
filename
.
split
(
"."
)[
-
1
]
==
"md"
:
loader
=
UnstructuredMarkdownLoader
(
file_path
)
data
=
loader
.
load
()
result
=
store_data_in_vector_db
(
data
,
collection_name
)
...
...
backend/requirements.txt
View file @
1633ba44
...
...
@@ -22,6 +22,8 @@ chromadb
sentence_transformers
pypdf
docx2txt
unstructured
markdown
PyJWT
pyjwt[crypto]
...
...
src/lib/components/chat/MessageInput.svelte
View file @
1633ba44
...
...
@@ -149,9 +149,13 @@
if (inputFiles && inputFiles.length > 0) {
const file = inputFiles[0];
console.log(file, file.name.split('.').at(-1));
if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) {
reader.readAsDataURL(file);
} else if (SUPPORTED_FILE_TYPE.includes(file['type'])) {
} else if (
SUPPORTED_FILE_TYPE.includes(file['type']) ||
['md'].includes(file.name.split('.').at(-1))
) {
uploadDoc(file);
} else {
toast.error(`Unsupported File Type '${file['type']}'.`);
...
...
src/lib/constants.ts
View file @
1633ba44
...
...
@@ -14,6 +14,7 @@ export const REQUIRED_OLLAMA_VERSION = '0.1.16';
export
const
SUPPORTED_FILE_TYPE
=
[
'
application/pdf
'
,
'
application/vnd.openxmlformats-officedocument.wordprocessingml.document
'
,
'
text/markdown
'
,
'
text/plain
'
,
'
text/csv
'
];
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment