Unverified Commit 6f3b2047 authored by Russell Bryant's avatar Russell Bryant Committed by GitHub
Browse files

[Core] Fix SSRF bypass via backslash-@ URL parsing inconsistency (#34743)


Signed-off-by: default avatarRussell Bryant <rbryant@redhat.com>
Co-authored-by: default avatarisotr0py <2037008807@qq.com>
parent 02e8f26c
...@@ -7,8 +7,10 @@ import mimetypes ...@@ -7,8 +7,10 @@ import mimetypes
import os import os
from tempfile import NamedTemporaryFile, TemporaryDirectory from tempfile import NamedTemporaryFile, TemporaryDirectory
import aiohttp
import numpy as np import numpy as np
import pytest import pytest
import requests
import torch import torch
from PIL import Image, ImageChops from PIL import Image, ImageChops
...@@ -318,3 +320,58 @@ async def test_allowed_media_domains(video_url: str, num_frames: int): ...@@ -318,3 +320,58 @@ async def test_allowed_media_domains(video_url: str, num_frames: int):
with pytest.raises(ValueError): with pytest.raises(ValueError):
_, _ = await connector.fetch_video_async(disallowed_url) _, _ = await connector.fetch_video_async(disallowed_url)
@pytest.mark.asyncio
async def test_ssrf_bypass_backslash_in_url(local_asset_server):
"""Verify that backslash-@ URL parsing confusion cannot bypass the
allowed_media_domains check (GHSA-v359-jj2v-j536).
urllib3.parse_url() and aiohttp/yarl disagree on how to parse a
backslash before ``@``. urllib3 treats ``\\`` as part of the path
(encoding it as ``%5C``), while yarl treats it as a userinfo
separator, changing the effective host. The fix normalises the URL
through urllib3 *before* handing it to aiohttp so both layers agree.
"""
port = local_asset_server.port
asset = TEST_IMAGE_ASSETS[0]
# Craft the bypass payload: urllib3 sees host=127.0.0.1, but an
# un-patched aiohttp would see host=example.com.
bypass_url = f"http://127.0.0.1:{port}\\@example.com/{asset}"
connector = MediaConnector(
allowed_media_domains=["127.0.0.1"],
)
# After the fix the request is made to 127.0.0.1 (the local asset
# server) using the normalised URL. The normalised path will be
# /%5C@example.com/<asset> which won't match any file the server
# knows about, so we expect an HTTP error — but crucially NOT a
# successful fetch from example.com.
with pytest.raises(requests.exceptions.HTTPError):
connector.fetch_image(bypass_url)
with pytest.raises(aiohttp.ClientResponseError):
await connector.fetch_image_async(bypass_url)
@pytest.mark.asyncio
async def test_ssrf_bypass_backslash_disallowed_domain():
"""The reverse direction: even when the *attacker-controlled* host
appears in the urllib3-parsed hostname position the allowlist must
still block it.
"""
# urllib3.parse_url sees host=example.com which is NOT in the
# allowlist, so this must be rejected before any request is made.
bypass_url = "https://example.com\\@safe.example.org/image.png"
connector = MediaConnector(
allowed_media_domains=["safe.example.org"],
)
with pytest.raises(ValueError, match="allowed domains"):
connector.fetch_image(bypass_url)
with pytest.raises(ValueError, match="allowed domains"):
await connector.fetch_image_async(bypass_url)
...@@ -146,7 +146,7 @@ class MediaConnector: ...@@ -146,7 +146,7 @@ class MediaConnector:
connection = self.connection connection = self.connection
data = connection.get_bytes( data = connection.get_bytes(
url, url_spec.url,
timeout=fetch_timeout, timeout=fetch_timeout,
allow_redirects=envs.VLLM_MEDIA_URL_ALLOW_REDIRECTS, allow_redirects=envs.VLLM_MEDIA_URL_ALLOW_REDIRECTS,
) )
...@@ -177,7 +177,7 @@ class MediaConnector: ...@@ -177,7 +177,7 @@ class MediaConnector:
connection = self.connection connection = self.connection
data = await connection.async_get_bytes( data = await connection.async_get_bytes(
url, url_spec.url,
timeout=fetch_timeout, timeout=fetch_timeout,
allow_redirects=envs.VLLM_MEDIA_URL_ALLOW_REDIRECTS, allow_redirects=envs.VLLM_MEDIA_URL_ALLOW_REDIRECTS,
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment