Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
open-webui
Commits
c1ec604f
"src/tl_templates/vscode:/vscode.git/clone" did not exist on "eccdfe17cec7f3eb62c136c6b59ba00e20d799c9"
Commit
c1ec604f
authored
Jan 09, 2024
by
Timothy J. Baek
Browse files
feat: rag md support
parent
358f79f5
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
22 additions
and
1 deletion
+22
-1
backend/apps/rag/main.py
backend/apps/rag/main.py
+15
-0
backend/requirements.txt
backend/requirements.txt
+1
-0
src/lib/components/chat/MessageInput.svelte
src/lib/components/chat/MessageInput.svelte
+5
-1
src/lib/constants.ts
src/lib/constants.ts
+1
-0
No files found.
backend/apps/rag/main.py
View file @
c1ec604f
...
@@ -19,6 +19,8 @@ from langchain_community.document_loaders import (
...
@@ -19,6 +19,8 @@ from langchain_community.document_loaders import (
PyPDFLoader
,
PyPDFLoader
,
CSVLoader
,
CSVLoader
,
Docx2txtLoader
,
Docx2txtLoader
,
UnstructuredWordDocumentLoader
,
UnstructuredMarkdownLoader
,
)
)
from
langchain.text_splitter
import
RecursiveCharacterTextSplitter
from
langchain.text_splitter
import
RecursiveCharacterTextSplitter
from
langchain_community.vectorstores
import
Chroma
from
langchain_community.vectorstores
import
Chroma
...
@@ -140,17 +142,27 @@ def store_doc(
...
@@ -140,17 +142,27 @@ def store_doc(
):
):
# "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm"
# "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm"
print
(
file
.
content_type
)
if
file
.
content_type
not
in
[
if
file
.
content_type
not
in
[
"application/pdf"
,
"application/pdf"
,
"text/plain"
,
"text/plain"
,
"text/csv"
,
"text/csv"
,
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
,
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
,
"application/octet-stream"
,
]:
]:
raise
HTTPException
(
raise
HTTPException
(
status_code
=
status
.
HTTP_400_BAD_REQUEST
,
status_code
=
status
.
HTTP_400_BAD_REQUEST
,
detail
=
ERROR_MESSAGES
.
FILE_NOT_SUPPORTED
,
detail
=
ERROR_MESSAGES
.
FILE_NOT_SUPPORTED
,
)
)
if
file
.
content_type
==
"application/octet-stream"
and
file
.
filename
.
split
(
"."
)[
-
1
]
not
in
[
"md"
]:
raise
HTTPException
(
status_code
=
status
.
HTTP_400_BAD_REQUEST
,
detail
=
ERROR_MESSAGES
.
FILE_NOT_SUPPORTED
,
)
try
:
try
:
filename
=
file
.
filename
filename
=
file
.
filename
file_path
=
f
"
{
UPLOAD_DIR
}
/
{
filename
}
"
file_path
=
f
"
{
UPLOAD_DIR
}
/
{
filename
}
"
...
@@ -175,6 +187,9 @@ def store_doc(
...
@@ -175,6 +187,9 @@ def store_doc(
loader
=
TextLoader
(
file_path
)
loader
=
TextLoader
(
file_path
)
elif
file
.
content_type
==
"text/csv"
:
elif
file
.
content_type
==
"text/csv"
:
loader
=
CSVLoader
(
file_path
)
loader
=
CSVLoader
(
file_path
)
elif
file
.
content_type
==
"application/octet-stream"
:
if
file
.
filename
.
split
(
"."
)[
-
1
]
==
"md"
:
loader
=
UnstructuredMarkdownLoader
(
file_path
)
data
=
loader
.
load
()
data
=
loader
.
load
()
result
=
store_data_in_vector_db
(
data
,
collection_name
)
result
=
store_data_in_vector_db
(
data
,
collection_name
)
...
...
backend/requirements.txt
View file @
c1ec604f
...
@@ -22,6 +22,7 @@ chromadb
...
@@ -22,6 +22,7 @@ chromadb
sentence_transformers
sentence_transformers
pypdf
pypdf
docx2txt
docx2txt
unstructured
PyJWT
PyJWT
pyjwt[crypto]
pyjwt[crypto]
...
...
src/lib/components/chat/MessageInput.svelte
View file @
c1ec604f
...
@@ -149,9 +149,13 @@
...
@@ -149,9 +149,13 @@
if (inputFiles && inputFiles.length > 0) {
if (inputFiles && inputFiles.length > 0) {
const file = inputFiles[0];
const file = inputFiles[0];
console.log(file, file.name.split('.').at(-1));
if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) {
if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) {
reader.readAsDataURL(file);
reader.readAsDataURL(file);
} else if (SUPPORTED_FILE_TYPE.includes(file['type'])) {
} else if (
SUPPORTED_FILE_TYPE.includes(file['type']) ||
['md'].includes(file.name.split('.').at(-1))
) {
uploadDoc(file);
uploadDoc(file);
} else {
} else {
toast.error(`Unsupported File Type '${file['type']}'.`);
toast.error(`Unsupported File Type '${file['type']}'.`);
...
...
src/lib/constants.ts
View file @
c1ec604f
...
@@ -14,6 +14,7 @@ export const REQUIRED_OLLAMA_VERSION = '0.1.16';
...
@@ -14,6 +14,7 @@ export const REQUIRED_OLLAMA_VERSION = '0.1.16';
export
const
SUPPORTED_FILE_TYPE
=
[
export
const
SUPPORTED_FILE_TYPE
=
[
'
application/pdf
'
,
'
application/pdf
'
,
'
application/vnd.openxmlformats-officedocument.wordprocessingml.document
'
,
'
application/vnd.openxmlformats-officedocument.wordprocessingml.document
'
,
'
text/markdown
'
,
'
text/plain
'
,
'
text/plain
'
,
'
text/csv
'
'
text/csv
'
];
];
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment