Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
open-webui
Commits
a3928656
"vscode:/vscode.git/clone" did not exist on "4338cc475029dcd37a291a867d52419122648e72"
Commit
a3928656
authored
Jul 01, 2024
by
Timothy J. Baek
Browse files
refac
parent
3c1ea243
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
40 additions
and
37 deletions
+40
-37
backend/apps/rag/main.py
backend/apps/rag/main.py
+18
-15
backend/config.py
backend/config.py
+5
-5
src/lib/apis/rag/index.ts
src/lib/apis/rag/index.ts
+2
-2
src/lib/components/admin/Settings/Documents.svelte
src/lib/components/admin/Settings/Documents.svelte
+15
-15
No files found.
backend/apps/rag/main.py
View file @
a3928656
...
...
@@ -91,7 +91,7 @@ from config import (
SRC_LOG_LEVELS
,
UPLOAD_DIR
,
DOCS_DIR
,
TE
X
T_EXTRACTION_ENGINE
,
CON
TE
N
T_EXTRACTION_ENGINE
,
TIKA_SERVER_URL
,
RAG_TOP_K
,
RAG_RELEVANCE_THRESHOLD
,
...
...
@@ -148,7 +148,7 @@ app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = (
ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION
)
app
.
state
.
config
.
TE
X
T_EXTRACTION_ENGINE
=
TE
X
T_EXTRACTION_ENGINE
app
.
state
.
config
.
CON
TE
N
T_EXTRACTION_ENGINE
=
CON
TE
N
T_EXTRACTION_ENGINE
app
.
state
.
config
.
TIKA_SERVER_URL
=
TIKA_SERVER_URL
app
.
state
.
config
.
CHUNK_SIZE
=
CHUNK_SIZE
...
...
@@ -395,8 +395,8 @@ async def get_rag_config(user=Depends(get_admin_user)):
return
{
"status"
:
True
,
"pdf_extract_images"
:
app
.
state
.
config
.
PDF_EXTRACT_IMAGES
,
"te
x
t_extraction"
:
{
"engine"
:
app
.
state
.
config
.
TE
X
T_EXTRACTION_ENGINE
,
"
con
te
n
t_extraction"
:
{
"engine"
:
app
.
state
.
config
.
CON
TE
N
T_EXTRACTION_ENGINE
,
"tika_server_url"
:
app
.
state
.
config
.
TIKA_SERVER_URL
,
},
"chunk"
:
{
...
...
@@ -428,7 +428,7 @@ async def get_rag_config(user=Depends(get_admin_user)):
}
class
Tex
tExtractionConfig
(
BaseModel
):
class
Conten
tExtractionConfig
(
BaseModel
):
engine
:
str
=
""
tika_server_url
:
Optional
[
str
]
=
None
...
...
@@ -466,7 +466,7 @@ class WebConfig(BaseModel):
class
ConfigUpdateForm
(
BaseModel
):
pdf_extract_images
:
Optional
[
bool
]
=
None
te
x
t_extraction
:
Optional
[
Tex
tExtractionConfig
]
=
None
con
te
n
t_extraction
:
Optional
[
Conten
tExtractionConfig
]
=
None
chunk
:
Optional
[
ChunkParamUpdateForm
]
=
None
youtube
:
Optional
[
YoutubeLoaderConfig
]
=
None
web
:
Optional
[
WebConfig
]
=
None
...
...
@@ -480,10 +480,10 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
else
app
.
state
.
config
.
PDF_EXTRACT_IMAGES
)
if
form_data
.
te
x
t_extraction
is
not
None
:
log
.
info
(
f
"Updating text settings:
{
form_data
.
te
x
t_extraction
}
"
)
app
.
state
.
config
.
TE
X
T_EXTRACTION_ENGINE
=
form_data
.
te
x
t_extraction
.
engine
app
.
state
.
config
.
TIKA_SERVER_URL
=
form_data
.
te
x
t_extraction
.
tika_server_url
if
form_data
.
con
te
n
t_extraction
is
not
None
:
log
.
info
(
f
"Updating text settings:
{
form_data
.
con
te
n
t_extraction
}
"
)
app
.
state
.
config
.
CON
TE
N
T_EXTRACTION_ENGINE
=
form_data
.
con
te
n
t_extraction
.
engine
app
.
state
.
config
.
TIKA_SERVER_URL
=
form_data
.
con
te
n
t_extraction
.
tika_server_url
if
form_data
.
chunk
is
not
None
:
app
.
state
.
config
.
CHUNK_SIZE
=
form_data
.
chunk
.
chunk_size
...
...
@@ -521,8 +521,8 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
return
{
"status"
:
True
,
"pdf_extract_images"
:
app
.
state
.
config
.
PDF_EXTRACT_IMAGES
,
"te
x
t_extraction"
:
{
"engine"
:
app
.
state
.
config
.
TE
X
T_EXTRACTION_ENGINE
,
"
con
te
n
t_extraction"
:
{
"engine"
:
app
.
state
.
config
.
CON
TE
N
T_EXTRACTION_ENGINE
,
"tika_server_url"
:
app
.
state
.
config
.
TIKA_SERVER_URL
,
},
"chunk"
:
{
...
...
@@ -1017,7 +1017,7 @@ class TikaLoader:
self
.
mime_type
=
mime_type
def
load
(
self
)
->
List
[
Document
]:
with
(
open
(
self
.
file_path
,
"rb"
)
as
f
)
:
with
open
(
self
.
file_path
,
"rb"
)
as
f
:
data
=
f
.
read
()
if
self
.
mime_type
is
not
None
:
...
...
@@ -1096,7 +1096,10 @@ def get_loader(filename: str, file_content_type: str, file_path: str):
"msg"
,
]
if
app
.
state
.
config
.
TEXT_EXTRACTION_ENGINE
==
"tika"
and
app
.
state
.
config
.
TIKA_SERVER_URL
:
if
(
app
.
state
.
config
.
CONTENT_EXTRACTION_ENGINE
==
"tika"
and
app
.
state
.
config
.
TIKA_SERVER_URL
):
if
file_ext
in
known_source_ext
or
(
file_content_type
and
file_content_type
.
find
(
"text/"
)
>=
0
):
...
...
backend/config.py
View file @
a3928656
...
...
@@ -886,13 +886,13 @@ if WEBUI_AUTH and WEBUI_SECRET_KEY == "":
raise
ValueError
(
ERROR_MESSAGES
.
ENV_VAR_NOT_FOUND
)
####################################
# RAG document te
x
t extraction
# RAG document
con
te
n
t extraction
####################################
TE
X
T_EXTRACTION_ENGINE
=
PersistentConfig
(
"TE
X
T_EXTRACTION_ENGINE"
,
"rag.
text_extraction_engine
"
,
os
.
environ
.
get
(
"TE
X
T_EXTRACTION_ENGINE"
,
""
).
lower
()
CON
TE
N
T_EXTRACTION_ENGINE
=
PersistentConfig
(
"
CON
TE
N
T_EXTRACTION_ENGINE"
,
"rag.
CONTENT_EXTRACTION_ENGINE
"
,
os
.
environ
.
get
(
"
CON
TE
N
T_EXTRACTION_ENGINE"
,
""
).
lower
()
,
)
TIKA_SERVER_URL
=
PersistentConfig
(
...
...
src/lib/apis/rag/index.ts
View file @
a3928656
...
...
@@ -32,7 +32,7 @@ type ChunkConfigForm = {
chunk_overlap
:
number
;
};
type
Tex
tExtractConfigForm
=
{
type
Conten
tExtractConfigForm
=
{
engine
:
string
;
tika_server_url
:
string
|
null
;
};
...
...
@@ -45,7 +45,7 @@ type YoutubeConfigForm = {
type
RAGConfigForm
=
{
pdf_extract_images
?:
boolean
;
chunk
?:
ChunkConfigForm
;
te
x
t_extraction
?:
Tex
tExtractConfigForm
;
con
te
n
t_extraction
?:
Conten
tExtractConfigForm
;
web_loader_ssl_verification
?:
boolean
;
youtube
?:
YoutubeConfigForm
;
};
...
...
src/lib/components/admin/Settings/Documents.svelte
View file @
a3928656
...
...
@@ -37,7 +37,7 @@
let embeddingModel = '';
let rerankingModel = '';
let te
x
tExtractionEngine = 'default';
let
con
te
n
tExtractionEngine = 'default';
let tikaServerUrl = '';
let showTikaServerUrl = false;
...
...
@@ -167,7 +167,7 @@
rerankingModelUpdateHandler();
}
if (te
x
tExtractionEngine === 'tika' && tikaServerUrl === '') {
if (
con
te
n
tExtractionEngine === 'tika' && tikaServerUrl === '') {
toast.error($i18n.t('Tika Server URL required.'));
return;
}
...
...
@@ -178,8 +178,8 @@
chunk_overlap: chunkOverlap,
chunk_size: chunkSize
},
te
x
t_extraction: {
engine: te
x
tExtractionEngine,
con
te
n
t_extraction: {
engine:
con
te
n
tExtractionEngine,
tika_server_url: tikaServerUrl
}
});
...
...
@@ -227,9 +227,9 @@
chunkSize = res.chunk.chunk_size;
chunkOverlap = res.chunk.chunk_overlap;
te
x
tExtractionEngine = res.te
x
t_extraction.engine;
tikaServerUrl = res.te
x
t_extraction.tika_server_url;
showTikaServerUrl = te
x
tExtractionEngine === 'tika';
con
te
n
tExtractionEngine = res.
con
te
n
t_extraction.engine;
tikaServerUrl = res.
con
te
n
t_extraction.tika_server_url;
showTikaServerUrl =
con
te
n
tExtractionEngine === 'tika';
}
});
</script>
...
...
@@ -415,9 +415,9 @@
<div class="flex items-center relative">
<select
class="dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
bind:value={te
x
tExtractionEngine}
bind:value={
con
te
n
tExtractionEngine}
on:change={(e) => {
showTikaServerUrl =
(
e.target.value === 'tika'
)
;
showTikaServerUrl = e.target.value === 'tika';
}}
>
<option value="default">{$i18n.t('Default')}</option>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment