Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
open-webui
Commits
e2edbede
Unverified
Commit
e2edbede
authored
Jan 19, 2024
by
Marclass
Committed by
GitHub
Jan 19, 2024
Browse files
Merge pull request #1 from Marclass/rag-arbitrary-files
Allow any file to be used for RAG.
parents
6070e6bc
aa1d3860
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
27 additions
and
43 deletions
+27
-43
backend/apps/rag/main.py
backend/apps/rag/main.py
+18
-38
src/lib/components/chat/MessageInput.svelte
src/lib/components/chat/MessageInput.svelte
+5
-3
src/routes/(app)/documents/+page.svelte
src/routes/(app)/documents/+page.svelte
+4
-2
No files found.
backend/apps/rag/main.py
View file @
e2edbede
...
@@ -144,37 +144,21 @@ def store_doc(
...
@@ -144,37 +144,21 @@ def store_doc(
# "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm"
# "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm"
print
(
file
.
content_type
)
print
(
file
.
content_type
)
if
file
.
content_type
not
in
[
"application/pdf"
,
text_xml
=
[
"xml"
]
"text/plain"
,
"text/csv"
,
"text/xml"
,
"text/x-python"
,
"text/css"
,
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
,
"application/octet-stream"
,
"application/x-javascript"
,
]:
raise
HTTPException
(
status_code
=
status
.
HTTP_400_BAD_REQUEST
,
detail
=
ERROR_MESSAGES
.
FILE_NOT_SUPPORTED
,
)
text_xml
=
[
"text/xml"
]
octet_markdown
=
[
"md"
]
octet_markdown
=
[
"md"
]
octet_plain
=
[
known_source_ext
=
[
"go"
,
"py"
,
"java"
,
"sh"
,
"bat"
,
"ps1"
,
"cmd"
,
"js"
,
"go"
,
"py"
,
"java"
,
"sh"
,
"bat"
,
"ps1"
,
"cmd"
,
"js"
,
"css"
,
"cpp"
,
"hpp"
,
"h"
,
"c"
,
"cs"
,
"sql"
,
"log"
,
"ini"
,
"css"
,
"cpp"
,
"hpp"
,
"h"
,
"c"
,
"cs"
,
"sql"
,
"log"
,
"ini"
,
"pl"
"pm"
,
"r"
,
"dart"
,
"dockerfile"
,
"env"
,
"php"
,
"hs"
,
"pl"
"pm"
,
"r"
,
"dart"
,
"dockerfile"
,
"env"
,
"php"
,
"hs"
,
"hsc"
,
"lua"
,
"nginxconf"
,
"conf"
,
"m"
,
"mm"
,
"plsql"
,
"perl"
,
"hsc"
,
"lua"
,
"nginxconf"
,
"conf"
,
"m"
,
"mm"
,
"plsql"
,
"perl"
,
"rb"
,
"rs"
,
"db2"
,
"scala"
,
"bash"
,
"swift"
,
"vue"
,
"svelte"
"rb"
,
"rs"
,
"db2"
,
"scala"
,
"bash"
,
"swift"
,
"vue"
,
"svelte"
]
]
docx_type
=
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
known_doc_ext
=
[
"doc"
,
"docx"
]
file_ext
=
file
.
filename
.
split
(
"."
)[
-
1
].
lower
()
file_ext
=
file
.
filename
.
split
(
"."
)[
-
1
].
lower
()
if
file
.
content_type
==
"application/octet-stream"
and
file_ext
not
in
(
octet_markdown
+
octet_plain
):
known_type
=
True
raise
HTTPException
(
status_code
=
status
.
HTTP_400_BAD_REQUEST
,
detail
=
ERROR_MESSAGES
.
FILE_NOT_SUPPORTED
,
)
try
:
try
:
filename
=
file
.
filename
filename
=
file
.
filename
file_path
=
f
"
{
UPLOAD_DIR
}
/
{
filename
}
"
file_path
=
f
"
{
UPLOAD_DIR
}
/
{
filename
}
"
...
@@ -188,27 +172,22 @@ def store_doc(
...
@@ -188,27 +172,22 @@ def store_doc(
collection_name
=
calculate_sha256
(
f
)[:
63
]
collection_name
=
calculate_sha256
(
f
)[:
63
]
f
.
close
()
f
.
close
()
if
file
.
content_type
==
"application/
pdf"
:
if
file
_ext
==
"
pdf"
:
loader
=
PyPDFLoader
(
file_path
)
loader
=
PyPDFLoader
(
file_path
)
elif
(
elif
(
file
.
content_type
==
docx_type
or
file_ext
in
known_doc_ext
):
file
.
content_type
==
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
):
loader
=
Docx2txtLoader
(
file_path
)
loader
=
Docx2txtLoader
(
file_path
)
elif
file_ext
==
"csv"
:
elif
file
.
content_type
==
"text/csv"
:
loader
=
CSVLoader
(
file_path
)
loader
=
CSVLoader
(
file_path
)
elif
file
.
content_type
in
text_xml
:
elif
file
_ext
in
text_xml
:
loader
=
UnstructuredXMLLoader
(
file_path
)
loader
=
UnstructuredXMLLoader
(
file_path
)
elif
file
.
content_type
==
"text/plain"
or
file
.
content_type
.
find
(
"text/"
)
>=
0
:
elif
file
_ext
in
known_source_ext
or
file
.
content_type
.
find
(
"text/"
)
>=
0
:
loader
=
TextLoader
(
file_path
)
loader
=
TextLoader
(
file_path
)
elif
file
.
content_type
==
"application/octet-stream"
:
elif
file_ext
in
octet_markdown
:
if
file_ext
in
octet_markdown
:
loader
=
UnstructuredMarkdownLoader
(
file_path
)
loader
=
UnstructuredMarkdownLoader
(
file_path
)
else
:
if
file_ext
in
octet_plain
:
loader
=
TextLoader
(
file_path
)
elif
file
.
content_type
==
"application/x-javascript"
:
loader
=
TextLoader
(
file_path
)
loader
=
TextLoader
(
file_path
)
known_type
=
False
data
=
loader
.
load
()
data
=
loader
.
load
()
result
=
store_data_in_vector_db
(
data
,
collection_name
)
result
=
store_data_in_vector_db
(
data
,
collection_name
)
...
@@ -218,6 +197,7 @@ def store_doc(
...
@@ -218,6 +197,7 @@ def store_doc(
"status"
:
True
,
"status"
:
True
,
"collection_name"
:
collection_name
,
"collection_name"
:
collection_name
,
"filename"
:
filename
,
"filename"
:
filename
,
"known_type"
:
known_type
,
}
}
else
:
else
:
raise
HTTPException
(
raise
HTTPException
(
...
...
src/lib/components/chat/MessageInput.svelte
View file @
e2edbede
...
@@ -173,7 +173,8 @@
...
@@ -173,7 +173,8 @@
) {
) {
uploadDoc(file);
uploadDoc(file);
} else {
} else {
toast.error(`Unsupported File Type '${file['type']}'.`);
toast.error(`Unknown File Type '${file['type']}', but accepting and treating as plain text`);
uploadDoc(file);
}
}
} else {
} else {
toast.error(`File not found.`);
toast.error(`File not found.`);
...
@@ -308,8 +309,9 @@
...
@@ -308,8 +309,9 @@
uploadDoc(file);
uploadDoc(file);
filesInputElement.value = '';
filesInputElement.value = '';
} else {
} else {
toast.error(`Unsupported File Type '${file['type']}'.`);
toast.error(`Unknown File Type '${file['type']}', but accepting and treating as plain text`);
inputFiles = null;
uploadDoc(file);
filesInputElement.value = '';
}
}
} else {
} else {
toast.error(`File not found.`);
toast.error(`File not found.`);
...
...
src/routes/(app)/documents/+page.svelte
View file @
e2edbede
...
@@ -73,7 +73,8 @@
...
@@ -73,7 +73,8 @@
) {
) {
uploadDoc(file);
uploadDoc(file);
} else {
} else {
toast.error(`Unsupported File Type '${file['type']}'.`);
toast.error(`Unknown File Type '${file['type']}', but accepting and treating as plain text`);
uploadDoc(file);
}
}
} else {
} else {
toast.error(`File not found.`);
toast.error(`File not found.`);
...
@@ -153,7 +154,8 @@
...
@@ -153,7 +154,8 @@
) {
) {
uploadDoc(file);
uploadDoc(file);
} else {
} else {
toast.error(`Unsupported File Type '${file['type']}'.`);
toast.error(`Unknown File Type '${file['type']}', but accepting and treating as plain text`);
uploadDoc(file);
}
}
inputFiles = null;
inputFiles = null;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment