Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
open-webui
Commits
7e5e2c42
"tests/vscode:/vscode.git/clone" did not exist on "5786b0e2f7a76229781e40671abee5000a315e88"
Commit
7e5e2c42
authored
Mar 08, 2024
by
Timothy J. Baek
Browse files
refac: rag routes
parent
30503b59
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
103 additions
and
75 deletions
+103
-75
backend/apps/rag/main.py
backend/apps/rag/main.py
+14
-75
backend/apps/rag/utils.py
backend/apps/rag/utils.py
+89
-0
No files found.
backend/apps/rag/main.py
View file @
7e5e2c42
...
@@ -44,6 +44,8 @@ from apps.web.models.documents import (
...
@@ -44,6 +44,8 @@ from apps.web.models.documents import (
DocumentResponse
,
DocumentResponse
,
)
)
from
apps.rag.utils
import
query_doc
,
query_collection
from
utils.misc
import
(
from
utils.misc
import
(
calculate_sha256
,
calculate_sha256
,
calculate_sha256_string
,
calculate_sha256_string
,
...
@@ -248,21 +250,18 @@ class QueryDocForm(BaseModel):
...
@@ -248,21 +250,18 @@ class QueryDocForm(BaseModel):
@
app
.
post
(
"/query/doc"
)
@
app
.
post
(
"/query/doc"
)
def
query_doc
(
def
query_doc
_handler
(
form_data
:
QueryDocForm
,
form_data
:
QueryDocForm
,
user
=
Depends
(
get_current_user
),
user
=
Depends
(
get_current_user
),
):
):
try
:
try
:
# if you use docker use the model from the environment variable
return
query_doc
(
collection
=
CHROMA_CLIENT
.
get_collection
(
collection_name
=
form_data
.
collection_name
,
name
=
form_data
.
collection_name
,
query
=
form_data
.
query
,
k
=
form_data
.
k
if
form_data
.
k
else
app
.
state
.
TOP_K
,
embedding_function
=
app
.
state
.
sentence_transformer_ef
,
embedding_function
=
app
.
state
.
sentence_transformer_ef
,
)
)
result
=
collection
.
query
(
query_texts
=
[
form_data
.
query
],
n_results
=
form_data
.
k
if
form_data
.
k
else
app
.
state
.
TOP_K
,
)
return
result
except
Exception
as
e
:
except
Exception
as
e
:
print
(
e
)
print
(
e
)
raise
HTTPException
(
raise
HTTPException
(
...
@@ -277,76 +276,16 @@ class QueryCollectionsForm(BaseModel):
...
@@ -277,76 +276,16 @@ class QueryCollectionsForm(BaseModel):
k
:
Optional
[
int
]
=
None
k
:
Optional
[
int
]
=
None
def
merge_and_sort_query_results
(
query_results
,
k
):
# Initialize lists to store combined data
combined_ids
=
[]
combined_distances
=
[]
combined_metadatas
=
[]
combined_documents
=
[]
# Combine data from each dictionary
for
data
in
query_results
:
combined_ids
.
extend
(
data
[
"ids"
][
0
])
combined_distances
.
extend
(
data
[
"distances"
][
0
])
combined_metadatas
.
extend
(
data
[
"metadatas"
][
0
])
combined_documents
.
extend
(
data
[
"documents"
][
0
])
# Create a list of tuples (distance, id, metadata, document)
combined
=
list
(
zip
(
combined_distances
,
combined_ids
,
combined_metadatas
,
combined_documents
)
)
# Sort the list based on distances
combined
.
sort
(
key
=
lambda
x
:
x
[
0
])
# Unzip the sorted list
sorted_distances
,
sorted_ids
,
sorted_metadatas
,
sorted_documents
=
zip
(
*
combined
)
# Slicing the lists to include only k elements
sorted_distances
=
list
(
sorted_distances
)[:
k
]
sorted_ids
=
list
(
sorted_ids
)[:
k
]
sorted_metadatas
=
list
(
sorted_metadatas
)[:
k
]
sorted_documents
=
list
(
sorted_documents
)[:
k
]
# Create the output dictionary
merged_query_results
=
{
"ids"
:
[
sorted_ids
],
"distances"
:
[
sorted_distances
],
"metadatas"
:
[
sorted_metadatas
],
"documents"
:
[
sorted_documents
],
"embeddings"
:
None
,
"uris"
:
None
,
"data"
:
None
,
}
return
merged_query_results
@
app
.
post
(
"/query/collection"
)
@
app
.
post
(
"/query/collection"
)
def
query_collection
(
def
query_collection
_handler
(
form_data
:
QueryCollectionsForm
,
form_data
:
QueryCollectionsForm
,
user
=
Depends
(
get_current_user
),
user
=
Depends
(
get_current_user
),
):
):
results
=
[]
return
query_collection
(
collection_names
=
form_data
.
collection_names
,
for
collection_name
in
form_data
.
collection_names
:
query
=
form_data
.
query
,
try
:
k
=
form_data
.
k
if
form_data
.
k
else
app
.
state
.
TOP_K
,
# if you use docker use the model from the environment variable
embedding_function
=
app
.
state
.
sentence_transformer_ef
,
collection
=
CHROMA_CLIENT
.
get_collection
(
name
=
collection_name
,
embedding_function
=
app
.
state
.
sentence_transformer_ef
,
)
result
=
collection
.
query
(
query_texts
=
[
form_data
.
query
],
n_results
=
form_data
.
k
if
form_data
.
k
else
app
.
state
.
TOP_K
,
)
results
.
append
(
result
)
except
:
pass
return
merge_and_sort_query_results
(
results
,
form_data
.
k
if
form_data
.
k
else
app
.
state
.
TOP_K
)
)
...
...
backend/apps/rag/utils.py
0 → 100644
View file @
7e5e2c42
from
typing
import
List
from
config
import
CHROMA_CLIENT
def
query_doc
(
collection_name
:
str
,
query
:
str
,
k
:
int
,
embedding_function
):
try
:
# if you use docker use the model from the environment variable
collection
=
CHROMA_CLIENT
.
get_collection
(
name
=
collection_name
,
embedding_function
=
embedding_function
,
)
result
=
collection
.
query
(
query_texts
=
[
query
],
n_results
=
k
,
)
return
result
except
Exception
as
e
:
raise
e
def
merge_and_sort_query_results
(
query_results
,
k
):
# Initialize lists to store combined data
combined_ids
=
[]
combined_distances
=
[]
combined_metadatas
=
[]
combined_documents
=
[]
# Combine data from each dictionary
for
data
in
query_results
:
combined_ids
.
extend
(
data
[
"ids"
][
0
])
combined_distances
.
extend
(
data
[
"distances"
][
0
])
combined_metadatas
.
extend
(
data
[
"metadatas"
][
0
])
combined_documents
.
extend
(
data
[
"documents"
][
0
])
# Create a list of tuples (distance, id, metadata, document)
combined
=
list
(
zip
(
combined_distances
,
combined_ids
,
combined_metadatas
,
combined_documents
)
)
# Sort the list based on distances
combined
.
sort
(
key
=
lambda
x
:
x
[
0
])
# Unzip the sorted list
sorted_distances
,
sorted_ids
,
sorted_metadatas
,
sorted_documents
=
zip
(
*
combined
)
# Slicing the lists to include only k elements
sorted_distances
=
list
(
sorted_distances
)[:
k
]
sorted_ids
=
list
(
sorted_ids
)[:
k
]
sorted_metadatas
=
list
(
sorted_metadatas
)[:
k
]
sorted_documents
=
list
(
sorted_documents
)[:
k
]
# Create the output dictionary
merged_query_results
=
{
"ids"
:
[
sorted_ids
],
"distances"
:
[
sorted_distances
],
"metadatas"
:
[
sorted_metadatas
],
"documents"
:
[
sorted_documents
],
"embeddings"
:
None
,
"uris"
:
None
,
"data"
:
None
,
}
return
merged_query_results
def
query_collection
(
collection_names
:
List
[
str
],
query
:
str
,
k
:
int
,
embedding_function
):
results
=
[]
for
collection_name
in
collection_names
:
try
:
# if you use docker use the model from the environment variable
collection
=
CHROMA_CLIENT
.
get_collection
(
name
=
collection_name
,
embedding_function
=
embedding_function
,
)
result
=
collection
.
query
(
query_texts
=
[
query
],
n_results
=
k
,
)
results
.
append
(
result
)
except
:
pass
return
merge_and_sort_query_results
(
results
,
k
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment