Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
open-webui
Commits
98948814
Commit
98948814
authored
Mar 10, 2024
by
Timothy J. Baek
Browse files
feat: toggle pdf ocr
parent
96ada232
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
138 additions
and
91 deletions
+138
-91
backend/apps/rag/main.py
backend/apps/rag/main.py
+24
-13
src/lib/apis/rag/index.ts
src/lib/apis/rag/index.ts
+15
-6
src/lib/components/documents/Settings/General.svelte
src/lib/components/documents/Settings/General.svelte
+99
-72
No files found.
backend/apps/rag/main.py
View file @
98948814
...
...
@@ -77,6 +77,7 @@ from constants import ERROR_MESSAGES
app
=
FastAPI
()
app
.
state
.
PDF_EXTRACT_IMAGES
=
False
app
.
state
.
CHUNK_SIZE
=
CHUNK_SIZE
app
.
state
.
CHUNK_OVERLAP
=
CHUNK_OVERLAP
app
.
state
.
RAG_TEMPLATE
=
RAG_TEMPLATE
...
...
@@ -184,12 +185,15 @@ async def update_embedding_model(
}
@
app
.
get
(
"/c
hunk
"
)
async
def
get_
chunk_params
(
user
=
Depends
(
get_admin_user
)):
@
app
.
get
(
"/c
onfig
"
)
async
def
get_
rag_config
(
user
=
Depends
(
get_admin_user
)):
return
{
"status"
:
True
,
"pdf_extract_images"
:
app
.
state
.
PDF_EXTRACT_IMAGES
,
"chunk"
:
{
"chunk_size"
:
app
.
state
.
CHUNK_SIZE
,
"chunk_overlap"
:
app
.
state
.
CHUNK_OVERLAP
,
},
}
...
...
@@ -198,17 +202,24 @@ class ChunkParamUpdateForm(BaseModel):
chunk_overlap
:
int
@
app
.
post
(
"/chunk/update"
)
async
def
update_chunk_params
(
form_data
:
ChunkParamUpdateForm
,
user
=
Depends
(
get_admin_user
)
):
app
.
state
.
CHUNK_SIZE
=
form_data
.
chunk_size
app
.
state
.
CHUNK_OVERLAP
=
form_data
.
chunk_overlap
class
ConfigUpdateForm
(
BaseModel
):
pdf_extract_images
:
bool
chunk
:
ChunkParamUpdateForm
@
app
.
post
(
"/config/update"
)
async
def
update_rag_config
(
form_data
:
ConfigUpdateForm
,
user
=
Depends
(
get_admin_user
)):
app
.
state
.
PDF_EXTRACT_IMAGES
=
form_data
.
pdf_extract_images
app
.
state
.
CHUNK_SIZE
=
form_data
.
chunk
.
chunk_size
app
.
state
.
CHUNK_OVERLAP
=
form_data
.
chunk
.
chunk_overlap
return
{
"status"
:
True
,
"pdf_extract_images"
:
app
.
state
.
PDF_EXTRACT_IMAGES
,
"chunk"
:
{
"chunk_size"
:
app
.
state
.
CHUNK_SIZE
,
"chunk_overlap"
:
app
.
state
.
CHUNK_OVERLAP
,
},
}
...
...
@@ -364,7 +375,7 @@ def get_loader(filename: str, file_content_type: str, file_path: str):
]
if
file_ext
==
"pdf"
:
loader
=
PyPDFLoader
(
file_path
,
extract_images
=
True
)
loader
=
PyPDFLoader
(
file_path
,
extract_images
=
app
.
state
.
PDF_EXTRACT_IMAGES
)
elif
file_ext
==
"csv"
:
loader
=
CSVLoader
(
file_path
)
elif
file_ext
==
"rst"
:
...
...
src/lib/apis/rag/index.ts
View file @
98948814
import
{
RAG_API_BASE_URL
}
from
'
$lib/constants
'
;
export
const
get
ChunkParams
=
async
(
token
:
string
)
=>
{
export
const
get
RAGConfig
=
async
(
token
:
string
)
=>
{
let
error
=
null
;
const
res
=
await
fetch
(
`
${
RAG_API_BASE_URL
}
/c
hunk
`
,
{
const
res
=
await
fetch
(
`
${
RAG_API_BASE_URL
}
/c
onfig
`
,
{
method
:
'
GET
'
,
headers
:
{
'
Content-Type
'
:
'
application/json
'
,
...
...
@@ -27,18 +27,27 @@ export const getChunkParams = async (token: string) => {
return
res
;
};
export
const
updateChunkParams
=
async
(
token
:
string
,
size
:
number
,
overlap
:
number
)
=>
{
type
ChunkConfigForm
=
{
chunk_size
:
number
;
chunk_overlap
:
number
;
};
type
RAGConfigForm
=
{
pdf_extract_images
:
boolean
;
chunk
:
ChunkConfigForm
;
};
export
const
updateRAGConfig
=
async
(
token
:
string
,
payload
:
RAGConfigForm
)
=>
{
let
error
=
null
;
const
res
=
await
fetch
(
`
${
RAG_API_BASE_URL
}
/c
hunk
/update`
,
{
const
res
=
await
fetch
(
`
${
RAG_API_BASE_URL
}
/c
onfig
/update`
,
{
method
:
'
POST
'
,
headers
:
{
'
Content-Type
'
:
'
application/json
'
,
Authorization
:
`Bearer
${
token
}
`
},
body
:
JSON
.
stringify
({
chunk_size
:
size
,
chunk_overlap
:
overlap
...
payload
})
})
.
then
(
async
(
res
)
=>
{
...
...
src/lib/components/documents/Settings/General.svelte
View file @
98948814
<script lang="ts">
import { getDocs } from '$lib/apis/documents';
import {
getChunkParams,
getRAGConfig,
updateRAGConfig,
getQuerySettings,
scanDocs,
updateChunkParams,
updateQuerySettings
} from '$lib/apis/rag';
import { documents } from '$lib/stores';
...
...
@@ -17,6 +17,7 @@
let chunkSize = 0;
let chunkOverlap = 0;
let pdfExtractImages = true;
let querySettings = {
template: '',
...
...
@@ -35,16 +36,24 @@
};
const submitHandler = async () => {
const res = await updateChunkParams(localStorage.token, chunkSize, chunkOverlap);
const res = await updateRAGConfig(localStorage.token, {
pdf_extract_images: pdfExtractImages,
chunk: {
chunk_overlap: chunkOverlap,
chunk_size: chunkSize
}
});
querySettings = await updateQuerySettings(localStorage.token, querySettings);
};
onMount(async () => {
const res = await get
ChunkParams
(localStorage.token);
const res = await get
RAGConfig
(localStorage.token);
if (res) {
chunkSize = res.chunk_size;
chunkOverlap = res.chunk_overlap;
pdfExtractImages = res.pdf_extract_images;
chunkSize = res.chunk.chunk_size;
chunkOverlap = res.chunk.chunk_overlap;
}
querySettings = await getQuerySettings(localStorage.token);
...
...
@@ -124,14 +133,15 @@
<hr class=" dark:border-gray-700" />
<div class=" ">
<div class=" space-y-3">
<div class=" space-y-3">
<div class=" text-sm font-medium">Chunk Params</div>
<div class=" flex">
<div class=" flex w-full justify-between">
<div class=" flex
gap-2
">
<div class=" flex w-full justify-between
gap-2
">
<div class="self-center text-xs font-medium min-w-fit">Chunk Size</div>
<div class="self-center
p-3
">
<div class="self-center">
<input
class=" w-full rounded py-1.5 px-4 text-sm dark:text-gray-300 dark:bg-gray-800 outline-none border border-gray-100 dark:border-gray-600"
type="number"
...
...
@@ -143,10 +153,10 @@
</div>
</div>
<div class="flex w-full">
<div class="flex w-full
gap-2
">
<div class=" self-center text-xs font-medium min-w-fit">Chunk Overlap</div>
<div class="self-center
p-3
">
<div class="self-center">
<input
class="w-full rounded py-1.5 px-4 text-sm dark:text-gray-300 dark:bg-gray-800 outline-none border border-gray-100 dark:border-gray-600"
type="number"
...
...
@@ -159,13 +169,29 @@
</div>
</div>
<div>
<div class="flex justify-between items-center text-xs">
<div class=" text-xs font-medium">PDF Extract Images (OCR)</div>
<button
class=" text-xs font-medium text-gray-500"
type="button"
on:click={() => {
pdfExtractImages = !pdfExtractImages;
}}>{pdfExtractImages ? 'On' : 'Off'}</button
>
</div>
</div>
</div>
<div>
<div class=" text-sm font-medium">Query Params</div>
<div class=" flex">
<div class=" flex w-full justify-between">
<div class=" flex
py-2
">
<div class=" flex w-full justify-between
gap-2
">
<div class="self-center text-xs font-medium flex-1">Top K</div>
<div class="self-center
p-3
">
<div class="self-center">
<input
class=" w-full rounded py-1.5 px-4 text-sm dark:text-gray-300 dark:bg-gray-800 outline-none border border-gray-100 dark:border-gray-600"
type="number"
...
...
@@ -203,6 +229,7 @@
</div>
</div>
</div>
</div>
<div class="flex justify-end pt-3 text-sm font-medium">
<button
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment