|
7 | 7 | import os |
8 | 8 | from tempfile import NamedTemporaryFile, TemporaryDirectory |
9 | 9 |
|
| 10 | +import aiohttp |
10 | 11 | import numpy as np |
11 | 12 | import pytest |
| 13 | +import requests |
12 | 14 | import torch |
13 | 15 | from PIL import Image, ImageChops |
14 | 16 |
|
@@ -318,3 +320,58 @@ async def test_allowed_media_domains(video_url: str, num_frames: int): |
318 | 320 |
|
319 | 321 | with pytest.raises(ValueError): |
320 | 322 | _, _ = await connector.fetch_video_async(disallowed_url) |
| 323 | + |
| 324 | + |
| 325 | +@pytest.mark.asyncio |
| 326 | +async def test_ssrf_bypass_backslash_in_url(local_asset_server): |
| 327 | + """Verify that backslash-@ URL parsing confusion cannot bypass the |
| 328 | + allowed_media_domains check (GHSA-v359-jj2v-j536). |
| 329 | +
|
| 330 | + urllib3.parse_url() and aiohttp/yarl disagree on how to parse a |
| 331 | + backslash before ``@``. urllib3 treats ``\\`` as part of the path |
| 332 | + (encoding it as ``%5C``), while yarl treats it as a userinfo |
| 333 | + separator, changing the effective host. The fix normalises the URL |
| 334 | + through urllib3 *before* handing it to aiohttp so both layers agree. |
| 335 | + """ |
| 336 | + port = local_asset_server.port |
| 337 | + asset = TEST_IMAGE_ASSETS[0] |
| 338 | + |
| 339 | + # Craft the bypass payload: urllib3 sees host=127.0.0.1, but an |
| 340 | + # un-patched aiohttp would see host=example.com. |
| 341 | + bypass_url = f"http://127.0.0.1:{port}\\@example.com/{asset}" |
| 342 | + |
| 343 | + connector = MediaConnector( |
| 344 | + allowed_media_domains=["127.0.0.1"], |
| 345 | + ) |
| 346 | + |
| 347 | + # After the fix the request is made to 127.0.0.1 (the local asset |
| 348 | + # server) using the normalised URL. The normalised path will be |
| 349 | + # /%5C@example.com/<asset> which won't match any file the server |
| 350 | + # knows about, so we expect an HTTP error — but crucially NOT a |
| 351 | + # successful fetch from example.com. |
| 352 | + with pytest.raises(requests.exceptions.HTTPError): |
| 353 | + connector.fetch_image(bypass_url) |
| 354 | + |
| 355 | + with pytest.raises(aiohttp.ClientResponseError): |
| 356 | + await connector.fetch_image_async(bypass_url) |
| 357 | + |
| 358 | + |
| 359 | +@pytest.mark.asyncio |
| 360 | +async def test_ssrf_bypass_backslash_disallowed_domain(): |
| 361 | + """The reverse direction: even when the *attacker-controlled* host |
| 362 | + appears in the urllib3-parsed hostname position the allowlist must |
| 363 | + still block it. |
| 364 | + """ |
| 365 | + # urllib3.parse_url sees host=example.com which is NOT in the |
| 366 | + # allowlist, so this must be rejected before any request is made. |
| 367 | + bypass_url = "https://example.com\\@safe.example.org/image.png" |
| 368 | + |
| 369 | + connector = MediaConnector( |
| 370 | + allowed_media_domains=["safe.example.org"], |
| 371 | + ) |
| 372 | + |
| 373 | + with pytest.raises(ValueError, match="allowed domains"): |
| 374 | + connector.fetch_image(bypass_url) |
| 375 | + |
| 376 | + with pytest.raises(ValueError, match="allowed domains"): |
| 377 | + await connector.fetch_image_async(bypass_url) |
0 commit comments