fix: honor DYN_MM_ALLOW_INTERNAL for localhost requests (DYN-2863) (#8535)

Signed-off-by: Tushar Sharma <tusharma@nvidia.com> Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

fix: honor DYN_MM_ALLOW_INTERNAL for localhost requests (DYN-2863) (#8535)
Signed-off-by: Tushar Sharma <tusharma@nvidia.com> Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
159d9e06 · Tushar Sharma · GitHub · ab7a22ae · 159d9e06 · 159d9e06
Unverified Commit 159d9e06 authored Apr 23, 2026 by Tushar Sharma Committed by GitHub Apr 23, 2026
4 changed files
--- a/components/src/dynamo/trtllm/workers/llm_worker.py
+++ b/components/src/dynamo/trtllm/workers/llm_worker.py
@@ -611,7 +611,9 @@ async def init_llm_worker(
            media_decoder.enable_image({"limits": {"max_alloc": 128 * 1024 * 1024}})
            media_fetcher = MediaFetcher()
            media_fetcher.timeout_ms(30000)
-            media_fetcher.allow_direct_port(False)
+            allow_internal = os.getenv("DYN_MM_ALLOW_INTERNAL", "0") == "1"
+            media_fetcher.allow_direct_ip(allow_internal)
+            media_fetcher.allow_direct_port(allow_internal)
        # Register the model with runtime config
        # Encode workers do NOT register - they're internal workers only

--- a/components/src/dynamo/vllm/main.py
+++ b/components/src/dynamo/vllm/main.py
@@ -685,7 +685,9 @@ async def register_vllm_model(
        media_fetcher = MediaFetcher()
        media_fetcher.timeout_ms(30000)
-        media_fetcher.allow_direct_port(False)
+        allow_internal = os.getenv("DYN_MM_ALLOW_INTERNAL", "0") == "1"
+        media_fetcher.allow_direct_ip(allow_internal)
+        media_fetcher.allow_direct_port(allow_internal)
    await register_model(
        model_input,

--- a/docs/features/multimodal/README.md
+++ b/docs/features/multimodal/README.md
@@ -60,7 +60,7 @@ Two environment variables loosen the defaults for non-public deployments:
 | Variable | Default | Effect |
 |----------|---------|--------|
-| `DYN_MM_ALLOW_INTERNAL` | `0` | Set to `1` to allow `http://` and private / internal IP targets. Intended for on-prem or local-dev setups where media lives on an internal network. |
+| `DYN_MM_ALLOW_INTERNAL` | `0` | Set to `1` to allow `http://`, private / internal IPs, and explicit ports. Intended for on-prem or local-dev setups where media lives on an internal network. |
 | `DYN_MM_LOCAL_PATH` | *(empty)* | Absolute directory prefix. When set, `file://` URIs and bare paths are allowed if they resolve inside this prefix. |
 <Warning>

--- a/tests/serve/test_vllm.py
+++ b/tests/serve/test_vllm.py
@@ -371,7 +371,7 @@ vllm_configs = {
        name="multimodal_agg_frontend_decoding",
        directory=vllm_dir,
        script_name="agg_multimodal.sh",
-        # post_merge because needs real NIXL not stub
+        # TODO(DYN-2863): revert to post_merge once pre-merge validates the fix.
        marks=[
            pytest.mark.gpu_1,
            pytest.mark.profiled_vram_gib(9.6),  # actual profiled peak with kv-bytes
@@ -379,10 +379,10 @@ vllm_configs = {
                1_710_490_000
            ),  # KV cache cap (2x safety over min=855_244_800)
            pytest.mark.timeout(220),  # ~5x observed 43.7s; 2B model loads slower on CI
-            pytest.mark.post_merge,
+            pytest.mark.pre_merge,
        ],
        model="Qwen/Qwen2-VL-2B-Instruct",
-        # Pass --frontend-decoding to enable Rust frontend image decoding + NIXL RDMA transfer
+        env={"DYN_MM_ALLOW_INTERNAL": "1"},
        script_args=[
            "--model",
            "Qwen/Qwen2-VL-2B-Instruct",