Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
open-webui
Commits
999d2bc2
Commit
999d2bc2
authored
Jun 01, 2024
by
Timothy J. Baek
Browse files
refac: web search
parent
912a704f
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
119 additions
and
92 deletions
+119
-92
backend/apps/rag/main.py
backend/apps/rag/main.py
+77
-11
backend/apps/rag/utils.py
backend/apps/rag/utils.py
+1
-53
backend/constants.py
backend/constants.py
+1
-1
src/lib/apis/rag/index.ts
src/lib/apis/rag/index.ts
+12
-3
src/lib/components/chat/Chat.svelte
src/lib/components/chat/Chat.svelte
+28
-24
No files found.
backend/apps/rag/main.py
View file @
999d2bc2
...
...
@@ -59,9 +59,16 @@ from apps.rag.utils import (
query_doc_with_hybrid_search
,
query_collection
,
query_collection_with_hybrid_search
,
search_web
,
)
from
apps.rag.search.brave
import
search_brave
from
apps.rag.search.google_pse
import
search_google_pse
from
apps.rag.search.main
import
SearchResult
from
apps.rag.search.searxng
import
search_searxng
from
apps.rag.search.serper
import
search_serper
from
apps.rag.search.serpstack
import
search_serpstack
from
utils.misc
import
(
calculate_sha256
,
calculate_sha256_string
,
...
...
@@ -716,19 +723,78 @@ def resolve_hostname(hostname):
return
ipv4_addresses
,
ipv6_addresses
def
search_web
(
engine
:
str
,
query
:
str
)
->
list
[
SearchResult
]:
"""Search the web using a search engine and return the results as a list of SearchResult objects.
Will look for a search engine API key in environment variables in the following order:
- SEARXNG_QUERY_URL
- GOOGLE_PSE_API_KEY + GOOGLE_PSE_ENGINE_ID
- BRAVE_SEARCH_API_KEY
- SERPSTACK_API_KEY
- SERPER_API_KEY
Args:
query (str): The query to search for
"""
# TODO: add playwright to search the web
if
engine
==
"searxng"
:
if
app
.
state
.
config
.
SEARXNG_QUERY_URL
:
return
search_searxng
(
app
.
state
.
config
.
SEARXNG_QUERY_URL
,
query
)
else
:
raise
Exception
(
"No SEARXNG_QUERY_URL found in environment variables"
)
elif
engine
==
"google_pse"
:
if
(
app
.
state
.
config
.
GOOGLE_PSE_API_KEY
and
app
.
state
.
config
.
GOOGLE_PSE_ENGINE_ID
):
return
search_google_pse
(
app
.
state
.
config
.
GOOGLE_PSE_API_KEY
,
app
.
state
.
config
.
GOOGLE_PSE_ENGINE_ID
,
query
,
)
else
:
raise
Exception
(
"No GOOGLE_PSE_API_KEY or GOOGLE_PSE_ENGINE_ID found in environment variables"
)
elif
engine
==
"brave"
:
if
app
.
state
.
config
.
BRAVE_SEARCH_API_KEY
:
return
search_brave
(
app
.
state
.
config
.
BRAVE_SEARCH_API_KEY
,
query
)
else
:
raise
Exception
(
"No BRAVE_SEARCH_API_KEY found in environment variables"
)
elif
engine
==
"serpstack"
:
if
app
.
state
.
config
.
SERPSTACK_API_KEY
:
return
search_serpstack
(
app
.
state
.
config
.
SERPSTACK_API_KEY
,
query
,
https_enabled
=
app
.
state
.
config
.
SERPSTACK_HTTPS
,
)
else
:
raise
Exception
(
"No SERPSTACK_API_KEY found in environment variables"
)
elif
engine
==
"serper"
:
if
app
.
state
.
config
.
SERPER_API_KEY
:
return
search_serper
(
app
.
state
.
config
.
SERPER_API_KEY
,
query
)
else
:
raise
Exception
(
"No SERPER_API_KEY found in environment variables"
)
else
:
raise
Exception
(
"No search engine API key found in environment variables"
)
@
app
.
post
(
"/web/search"
)
def
store_web_search
(
form_data
:
SearchForm
,
user
=
Depends
(
get_current_user
)):
try
:
try
:
web_results
=
search_web
(
app
.
state
.
config
.
RAG_WEB_SEARCH_ENGINE
,
form_data
.
query
)
except
Exception
as
e
:
log
.
exception
(
e
)
raise
HTTPException
(
status_code
=
status
.
HTTP_400_BAD_REQUEST
,
detail
=
ERROR_MESSAGES
.
WEB_SEARCH_ERROR
,
)
web_results
=
search_web
(
app
.
state
.
config
.
RAG_WEB_SEARCH_ENGINE
,
form_data
.
query
)
except
Exception
as
e
:
log
.
exception
(
e
)
print
(
e
)
raise
HTTPException
(
status_code
=
status
.
HTTP_400_BAD_REQUEST
,
detail
=
ERROR_MESSAGES
.
WEB_SEARCH_ERROR
(
e
),
)
try
:
urls
=
[
result
.
link
for
result
in
web_results
]
loader
=
get_web_loader
(
urls
)
data
=
loader
.
load
()
...
...
backend/apps/rag/utils.py
View file @
999d2bc2
...
...
@@ -20,12 +20,7 @@ from langchain.retrievers import (
from
typing
import
Optional
from
apps.rag.search.brave
import
search_brave
from
apps.rag.search.google_pse
import
search_google_pse
from
apps.rag.search.main
import
SearchResult
from
apps.rag.search.searxng
import
search_searxng
from
apps.rag.search.serper
import
search_serper
from
apps.rag.search.serpstack
import
search_serpstack
from
config
import
(
SRC_LOG_LEVELS
,
CHROMA_CLIENT
,
...
...
@@ -536,50 +531,3 @@ class RerankCompressor(BaseDocumentCompressor):
)
final_results
.
append
(
doc
)
return
final_results
def
search_web
(
engine
:
str
,
query
:
str
)
->
list
[
SearchResult
]:
"""Search the web using a search engine and return the results as a list of SearchResult objects.
Will look for a search engine API key in environment variables in the following order:
- SEARXNG_QUERY_URL
- GOOGLE_PSE_API_KEY + GOOGLE_PSE_ENGINE_ID
- BRAVE_SEARCH_API_KEY
- SERPSTACK_API_KEY
- SERPER_API_KEY
Args:
query (str): The query to search for
"""
# TODO: add playwright to search the web
if
engine
==
"searxng"
:
if
SEARXNG_QUERY_URL
:
return
search_searxng
(
SEARXNG_QUERY_URL
,
query
)
else
:
raise
Exception
(
"No SEARXNG_QUERY_URL found in environment variables"
)
elif
engine
==
"google_pse"
:
if
GOOGLE_PSE_API_KEY
and
GOOGLE_PSE_ENGINE_ID
:
return
search_google_pse
(
GOOGLE_PSE_API_KEY
,
GOOGLE_PSE_ENGINE_ID
,
query
)
else
:
raise
Exception
(
"No GOOGLE_PSE_API_KEY or GOOGLE_PSE_ENGINE_ID found in environment variables"
)
elif
engine
==
"brave"
:
if
BRAVE_SEARCH_API_KEY
:
return
search_brave
(
BRAVE_SEARCH_API_KEY
,
query
)
else
:
raise
Exception
(
"No BRAVE_SEARCH_API_KEY found in environment variables"
)
elif
engine
==
"serpstack"
:
if
SERPSTACK_API_KEY
:
return
search_serpstack
(
SERPSTACK_API_KEY
,
query
,
https_enabled
=
SERPSTACK_HTTPS
)
else
:
raise
Exception
(
"No SERPSTACK_API_KEY found in environment variables"
)
elif
engine
==
"serper"
:
if
SERPER_API_KEY
:
return
search_serper
(
SERPER_API_KEY
,
query
)
else
:
raise
Exception
(
"No SERPER_API_KEY found in environment variables"
)
else
:
raise
Exception
(
"No search engine API key found in environment variables"
)
backend/constants.py
View file @
999d2bc2
...
...
@@ -82,5 +82,5 @@ class ERROR_MESSAGES(str, Enum):
)
WEB_SEARCH_ERROR
=
(
"
Oops! Something went wrong while searching the web.
Please try again later.
"
lambda
err
=
""
:
f
"
{
err
if
err
else
'
Oops! Something went wrong while searching the web.
'
}
"
)
src/lib/apis/rag/index.ts
View file @
999d2bc2
...
...
@@ -518,8 +518,10 @@ export const runWebSearch = async (
token
:
string
,
query
:
string
,
collection_name
?:
string
):
Promise
<
SearchDocument
|
undefined
>
=>
{
return
await
fetch
(
`
${
RAG_API_BASE_URL
}
/web/search`
,
{
):
Promise
<
SearchDocument
|
null
>
=>
{
let
error
=
null
;
const
res
=
await
fetch
(
`
${
RAG_API_BASE_URL
}
/web/search`
,
{
method
:
'
POST
'
,
headers
:
{
'
Content-Type
'
:
'
application/json
'
,
...
...
@@ -536,8 +538,15 @@ export const runWebSearch = async (
})
.
catch
((
err
)
=>
{
console
.
log
(
err
);
return
undefined
;
error
=
err
.
detail
;
return
null
;
});
if
(
error
)
{
throw
error
;
}
return
res
;
};
export
interface
SearchDocument
{
...
...
src/lib/components/chat/Chat.svelte
View file @
999d2bc2
...
...
@@ -473,9 +473,34 @@
};
messages
=
messages
;
const
results
=
await
runWebSearch
(
localStorage
.
token
,
searchQuery
);
if
(
results
===
undefined
)
{
toast
.
warning
($
i18n
.
t
(
'No search results found'
));
const
results
=
await
runWebSearch
(
localStorage
.
token
,
searchQuery
).
catch
((
error
)
=>
{
console
.
log
(
error
);
toast
.
error
(
error
);
return
null
;
});
if
(
results
)
{
responseMessage
.
status
=
{
...
responseMessage
.
status
,
done
:
true
,
description
:
$
i18n
.
t
(
'Searched {{count}} sites'
,
{
count
:
results
.
filenames
.
length
}),
urls
:
results
.
filenames
};
if
(
responseMessage
?.
files
??
undefined
===
undefined
)
{
responseMessage
.
files
=
[];
}
responseMessage
.
files
.
push
({
collection_name
:
results
.
collection_name
,
name
:
searchQuery
,
type
:
'web_search_results'
,
urls
:
results
.
filenames
});
messages
=
messages
;
}
else
{
responseMessage
.
status
=
{
...
responseMessage
.
status
,
done
:
true
,
...
...
@@ -483,28 +508,7 @@
description
:
'No search results found'
};
messages
=
messages
;
return
;
}
responseMessage
.
status
=
{
...
responseMessage
.
status
,
done
:
true
,
description
:
$
i18n
.
t
(
'Searched {{count}} sites'
,
{
count
:
results
.
filenames
.
length
}),
urls
:
results
.
filenames
};
if
(
responseMessage
?.
files
??
undefined
===
undefined
)
{
responseMessage
.
files
=
[];
}
responseMessage
.
files
.
push
({
collection_name
:
results
.
collection_name
,
name
:
searchQuery
,
type
:
'web_search_results'
,
urls
:
results
.
filenames
});
messages
=
messages
;
};
const
sendPromptOllama
=
async
(
model
,
userPrompt
,
responseMessageId
,
_chatId
)
=>
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment