Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
open-webui
Commits
5d3db15e
Unverified
Commit
5d3db15e
authored
Jun 12, 2024
by
Timothy Jaeryang Baek
Committed by
GitHub
Jun 12, 2024
Browse files
Merge pull request #3049 from que-nguyen/dev
Refactor URL validation function
parents
eead6906
eb7bba81
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
7 additions
and
23 deletions
+7
-23
backend/apps/rag/main.py
backend/apps/rag/main.py
+7
-23
No files found.
backend/apps/rag/main.py
View file @
5d3db15e
...
@@ -8,6 +8,7 @@ from fastapi import (
...
@@ -8,6 +8,7 @@ from fastapi import (
Form
,
Form
,
)
)
from
fastapi.middleware.cors
import
CORSMiddleware
from
fastapi.middleware.cors
import
CORSMiddleware
import
requests
import
os
,
shutil
,
logging
,
re
import
os
,
shutil
,
logging
,
re
from
datetime
import
datetime
from
datetime
import
datetime
...
@@ -716,17 +717,12 @@ def validate_url(url: Union[str, Sequence[str]]):
...
@@ -716,17 +717,12 @@ def validate_url(url: Union[str, Sequence[str]]):
if
isinstance
(
validators
.
url
(
url
),
validators
.
ValidationError
):
if
isinstance
(
validators
.
url
(
url
),
validators
.
ValidationError
):
raise
ValueError
(
ERROR_MESSAGES
.
INVALID_URL
)
raise
ValueError
(
ERROR_MESSAGES
.
INVALID_URL
)
if
not
ENABLE_RAG_LOCAL_WEB_FETCH
:
if
not
ENABLE_RAG_LOCAL_WEB_FETCH
:
# Local web fetch is disabled, filter out any URLs that resolve to private IP addresses
# Check if the URL exists by making a HEAD request
parsed_url
=
urllib
.
parse
.
urlparse
(
url
)
try
:
# Get IPv4 and IPv6 addresses
response
=
requests
.
head
(
url
,
allow_redirects
=
True
)
ipv4_addresses
,
ipv6_addresses
=
resolve_hostname
(
parsed_url
.
hostname
)
if
response
.
status_code
!=
200
:
# Check if any of the resolved addresses are private
# This is technically still vulnerable to DNS rebinding attacks, as we don't control WebBaseLoader
for
ip
in
ipv4_addresses
:
if
validators
.
ipv4
(
ip
,
private
=
True
):
raise
ValueError
(
ERROR_MESSAGES
.
INVALID_URL
)
raise
ValueError
(
ERROR_MESSAGES
.
INVALID_URL
)
for
ip
in
ipv6_addresses
:
except
requests
.
exceptions
.
RequestException
:
if
validators
.
ipv6
(
ip
,
private
=
True
):
raise
ValueError
(
ERROR_MESSAGES
.
INVALID_URL
)
raise
ValueError
(
ERROR_MESSAGES
.
INVALID_URL
)
return
True
return
True
elif
isinstance
(
url
,
Sequence
):
elif
isinstance
(
url
,
Sequence
):
...
@@ -734,18 +730,6 @@ def validate_url(url: Union[str, Sequence[str]]):
...
@@ -734,18 +730,6 @@ def validate_url(url: Union[str, Sequence[str]]):
else
:
else
:
return
False
return
False
def
resolve_hostname
(
hostname
):
# Get address information
addr_info
=
socket
.
getaddrinfo
(
hostname
,
None
)
# Extract IP addresses from address information
ipv4_addresses
=
[
info
[
4
][
0
]
for
info
in
addr_info
if
info
[
0
]
==
socket
.
AF_INET
]
ipv6_addresses
=
[
info
[
4
][
0
]
for
info
in
addr_info
if
info
[
0
]
==
socket
.
AF_INET6
]
return
ipv4_addresses
,
ipv6_addresses
def
search_web
(
engine
:
str
,
query
:
str
)
->
list
[
SearchResult
]:
def
search_web
(
engine
:
str
,
query
:
str
)
->
list
[
SearchResult
]:
"""Search the web using a search engine and return the results as a list of SearchResult objects.
"""Search the web using a search engine and return the results as a list of SearchResult objects.
Will look for a search engine API key in environment variables in the following order:
Will look for a search engine API key in environment variables in the following order:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment