Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
open-webui
Commits
93fa9824
Unverified
Commit
93fa9824
authored
Jun 11, 2024
by
Timothy Jaeryang Baek
Committed by
GitHub
Jun 11, 2024
Browse files
Merge pull request #3033 from que-nguyen/dev
Fixed the issue where a single URL error disrupts the Web Search
parents
e7727aea
3bec60b8
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
26 additions
and
3 deletions
+26
-3
backend/apps/rag/main.py
backend/apps/rag/main.py
+26
-3
No files found.
backend/apps/rag/main.py
View file @
93fa9824
...
@@ -12,9 +12,10 @@ import os, shutil, logging, re
...
@@ -12,9 +12,10 @@ import os, shutil, logging, re
from
datetime
import
datetime
from
datetime
import
datetime
from
pathlib
import
Path
from
pathlib
import
Path
from
typing
import
List
,
Union
,
Sequence
from
typing
import
List
,
Union
,
Sequence
,
Iterator
,
Any
from
chromadb.utils.batch_utils
import
create_batches
from
chromadb.utils.batch_utils
import
create_batches
from
langchain_core.documents
import
Document
from
langchain_community.document_loaders
import
(
from
langchain_community.document_loaders
import
(
WebBaseLoader
,
WebBaseLoader
,
...
@@ -701,7 +702,7 @@ def get_web_loader(url: Union[str, Sequence[str]], verify_ssl: bool = True):
...
@@ -701,7 +702,7 @@ def get_web_loader(url: Union[str, Sequence[str]], verify_ssl: bool = True):
# Check if the URL is valid
# Check if the URL is valid
if
not
validate_url
(
url
):
if
not
validate_url
(
url
):
raise
ValueError
(
ERROR_MESSAGES
.
INVALID_URL
)
raise
ValueError
(
ERROR_MESSAGES
.
INVALID_URL
)
return
WebBaseLoader
(
return
Safe
WebBaseLoader
(
url
,
url
,
verify_ssl
=
verify_ssl
,
verify_ssl
=
verify_ssl
,
requests_per_second
=
RAG_WEB_SEARCH_CONCURRENT_REQUESTS
,
requests_per_second
=
RAG_WEB_SEARCH_CONCURRENT_REQUESTS
,
...
@@ -1237,7 +1238,29 @@ def reset(user=Depends(get_admin_user)) -> bool:
...
@@ -1237,7 +1238,29 @@ def reset(user=Depends(get_admin_user)) -> bool:
return
True
return
True
class
SafeWebBaseLoader
(
WebBaseLoader
):
"""WebBaseLoader with enhanced error handling for URLs."""
def
lazy_load
(
self
)
->
Iterator
[
Document
]:
"""Lazy load text from the url(s) in web_path with error handling."""
for
path
in
self
.
web_paths
:
try
:
soup
=
self
.
_scrape
(
path
,
bs_kwargs
=
self
.
bs_kwargs
)
text
=
soup
.
get_text
(
**
self
.
bs_get_text_kwargs
)
# Build metadata
metadata
=
{
"source"
:
path
}
if
title
:
=
soup
.
find
(
"title"
):
metadata
[
"title"
]
=
title
.
get_text
()
if
description
:
=
soup
.
find
(
"meta"
,
attrs
=
{
"name"
:
"description"
}):
metadata
[
"description"
]
=
description
.
get
(
"content"
,
"No description found."
)
if
html
:
=
soup
.
find
(
"html"
):
metadata
[
"language"
]
=
html
.
get
(
"lang"
,
"No language found."
)
yield
Document
(
page_content
=
text
,
metadata
=
metadata
)
except
Exception
as
e
:
# Log the error and continue with the next URL
log
.
error
(
f
"Error loading
{
path
}
:
{
e
}
"
)
if
ENV
==
"dev"
:
if
ENV
==
"dev"
:
@
app
.
get
(
"/ef"
)
@
app
.
get
(
"/ef"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment