Commit 9d9a2b77 authored by Chenheli Hua's avatar Chenheli Hua Committed by simon-mo
Browse files

[Small] Prevent bypassing media domain restriction via HTTP redirects (#26035)


Signed-off-by: default avatarChenheli Hua <huachenheli@outlook.com>
Signed-off-by: default avatarsimon-mo <simon.mo@hey.com>
parent 6040e0b6
...@@ -8,6 +8,9 @@ This page teaches you how to pass multi-modal inputs to [multi-modal models][sup ...@@ -8,6 +8,9 @@ This page teaches you how to pass multi-modal inputs to [multi-modal models][sup
!!! tip !!! tip
When serving multi-modal models, consider setting `--allowed-media-domains` to restrict domain that vLLM can access to prevent it from accessing arbitrary endpoints that can potentially be vulnerable to Server-Side Request Forgery (SSRF) attacks. You can provide a list of domains for this arg. For example: `--allowed-media-domains upload.wikimedia.org github.com www.bogotobogo.com` When serving multi-modal models, consider setting `--allowed-media-domains` to restrict domain that vLLM can access to prevent it from accessing arbitrary endpoints that can potentially be vulnerable to Server-Side Request Forgery (SSRF) attacks. You can provide a list of domains for this arg. For example: `--allowed-media-domains upload.wikimedia.org github.com www.bogotobogo.com`
Also, consider setting `VLLM_MEDIA_URL_ALLOW_REDIRECTS=0` to prevent HTTP redirects from being followed to bypass domain restrictions.
This restriction is especially important if you run vLLM in a containerized environment where the vLLM pods may have unrestricted access to internal networks. This restriction is especially important if you run vLLM in a containerized environment where the vLLM pods may have unrestricted access to internal networks.
## Offline Inference ## Offline Inference
......
...@@ -66,6 +66,9 @@ Restrict domains that vLLM can access for media URLs by setting ...@@ -66,6 +66,9 @@ Restrict domains that vLLM can access for media URLs by setting
`--allowed-media-domains` to prevent Server-Side Request Forgery (SSRF) attacks. `--allowed-media-domains` to prevent Server-Side Request Forgery (SSRF) attacks.
(e.g. `--allowed-media-domains upload.wikimedia.org github.com www.bogotobogo.com`) (e.g. `--allowed-media-domains upload.wikimedia.org github.com www.bogotobogo.com`)
Also, consider setting `VLLM_MEDIA_URL_ALLOW_REDIRECTS=0` to prevent HTTP
redirects from being followed to bypass domain restrictions.
## Security and Firewalls: Protecting Exposed vLLM Systems ## Security and Firewalls: Protecting Exposed vLLM Systems
While vLLM is designed to allow unsafe network services to be isolated to While vLLM is designed to allow unsafe network services to be isolated to
......
...@@ -54,6 +54,7 @@ class HTTPConnection: ...@@ -54,6 +54,7 @@ class HTTPConnection:
stream: bool = False, stream: bool = False,
timeout: Optional[float] = None, timeout: Optional[float] = None,
extra_headers: Optional[Mapping[str, str]] = None, extra_headers: Optional[Mapping[str, str]] = None,
allow_redirects: bool = True,
): ):
self._validate_http_url(url) self._validate_http_url(url)
...@@ -63,7 +64,8 @@ class HTTPConnection: ...@@ -63,7 +64,8 @@ class HTTPConnection:
return client.get(url, return client.get(url,
headers=self._headers(**extra_headers), headers=self._headers(**extra_headers),
stream=stream, stream=stream,
timeout=timeout) timeout=timeout,
allow_redirects=allow_redirects)
async def get_async_response( async def get_async_response(
self, self,
...@@ -71,6 +73,7 @@ class HTTPConnection: ...@@ -71,6 +73,7 @@ class HTTPConnection:
*, *,
timeout: Optional[float] = None, timeout: Optional[float] = None,
extra_headers: Optional[Mapping[str, str]] = None, extra_headers: Optional[Mapping[str, str]] = None,
allow_redirects: bool = True,
): ):
self._validate_http_url(url) self._validate_http_url(url)
...@@ -79,10 +82,17 @@ class HTTPConnection: ...@@ -79,10 +82,17 @@ class HTTPConnection:
return client.get(url, return client.get(url,
headers=self._headers(**extra_headers), headers=self._headers(**extra_headers),
timeout=timeout) timeout=timeout,
allow_redirects=allow_redirects)
def get_bytes(self, url: str, *, timeout: Optional[float] = None) -> bytes: def get_bytes(self,
with self.get_response(url, timeout=timeout) as r: url: str,
*,
timeout: Optional[float] = None,
allow_redirects: bool = True) -> bytes:
with self.get_response(url,
timeout=timeout,
allow_redirects=allow_redirects) as r:
r.raise_for_status() r.raise_for_status()
return r.content return r.content
...@@ -92,8 +102,10 @@ class HTTPConnection: ...@@ -92,8 +102,10 @@ class HTTPConnection:
url: str, url: str,
*, *,
timeout: Optional[float] = None, timeout: Optional[float] = None,
allow_redirects: bool = True,
) -> bytes: ) -> bytes:
async with await self.get_async_response(url, timeout=timeout) as r: async with await self.get_async_response(
url, timeout=timeout, allow_redirects=allow_redirects) as r:
r.raise_for_status() r.raise_for_status()
return await r.read() return await r.read()
......
...@@ -68,6 +68,7 @@ if TYPE_CHECKING: ...@@ -68,6 +68,7 @@ if TYPE_CHECKING:
VLLM_IMAGE_FETCH_TIMEOUT: int = 5 VLLM_IMAGE_FETCH_TIMEOUT: int = 5
VLLM_VIDEO_FETCH_TIMEOUT: int = 30 VLLM_VIDEO_FETCH_TIMEOUT: int = 30
VLLM_AUDIO_FETCH_TIMEOUT: int = 10 VLLM_AUDIO_FETCH_TIMEOUT: int = 10
VLLM_MEDIA_URL_ALLOW_REDIRECTS: bool = True
VLLM_MEDIA_LOADING_THREAD_COUNT: int = 8 VLLM_MEDIA_LOADING_THREAD_COUNT: int = 8
VLLM_MAX_AUDIO_CLIP_FILESIZE_MB: int = 25 VLLM_MAX_AUDIO_CLIP_FILESIZE_MB: int = 25
VLLM_VIDEO_LOADER_BACKEND: str = "opencv" VLLM_VIDEO_LOADER_BACKEND: str = "opencv"
...@@ -725,6 +726,11 @@ environment_variables: dict[str, Callable[[], Any]] = { ...@@ -725,6 +726,11 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_AUDIO_FETCH_TIMEOUT": "VLLM_AUDIO_FETCH_TIMEOUT":
lambda: int(os.getenv("VLLM_AUDIO_FETCH_TIMEOUT", "10")), lambda: int(os.getenv("VLLM_AUDIO_FETCH_TIMEOUT", "10")),
# Whether to allow HTTP redirects when fetching from media URLs.
# Default to True
"VLLM_MEDIA_URL_ALLOW_REDIRECTS":
lambda: bool(int(os.getenv("VLLM_MEDIA_URL_ALLOW_REDIRECTS", "1"))),
# Max number of workers for the thread pool handling # Max number of workers for the thread pool handling
# media bytes loading. Set to 1 to disable parallel processing. # media bytes loading. Set to 1 to disable parallel processing.
# Default is 8 # Default is 8
......
...@@ -140,7 +140,11 @@ class MediaConnector: ...@@ -140,7 +140,11 @@ class MediaConnector:
self._assert_url_in_allowed_media_domains(url_spec) self._assert_url_in_allowed_media_domains(url_spec)
connection = self.connection connection = self.connection
data = connection.get_bytes(url, timeout=fetch_timeout) data = connection.get_bytes(
url,
timeout=fetch_timeout,
allow_redirects=envs.VLLM_MEDIA_URL_ALLOW_REDIRECTS,
)
return media_io.load_bytes(data) return media_io.load_bytes(data)
...@@ -167,7 +171,11 @@ class MediaConnector: ...@@ -167,7 +171,11 @@ class MediaConnector:
self._assert_url_in_allowed_media_domains(url_spec) self._assert_url_in_allowed_media_domains(url_spec)
connection = self.connection connection = self.connection
data = await connection.async_get_bytes(url, timeout=fetch_timeout) data = await connection.async_get_bytes(
url,
timeout=fetch_timeout,
allow_redirects=envs.VLLM_MEDIA_URL_ALLOW_REDIRECTS,
)
future = loop.run_in_executor(global_thread_pool, future = loop.run_in_executor(global_thread_pool,
media_io.load_bytes, data) media_io.load_bytes, data)
return await future return await future
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment