"ssh:/git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "6211b92273f5935d66a704d77fade25202bad615"
Unverified Commit 159d9e06 authored by Tushar Sharma's avatar Tushar Sharma Committed by GitHub
Browse files

fix: honor DYN_MM_ALLOW_INTERNAL for localhost requests (DYN-2863) (#8535)


Signed-off-by: default avatarTushar Sharma <tusharma@nvidia.com>
Co-authored-by: default avatarClaude Opus 4.7 (1M context) <noreply@anthropic.com>
parent ab7a22ae
...@@ -611,7 +611,9 @@ async def init_llm_worker( ...@@ -611,7 +611,9 @@ async def init_llm_worker(
media_decoder.enable_image({"limits": {"max_alloc": 128 * 1024 * 1024}}) media_decoder.enable_image({"limits": {"max_alloc": 128 * 1024 * 1024}})
media_fetcher = MediaFetcher() media_fetcher = MediaFetcher()
media_fetcher.timeout_ms(30000) media_fetcher.timeout_ms(30000)
media_fetcher.allow_direct_port(False) allow_internal = os.getenv("DYN_MM_ALLOW_INTERNAL", "0") == "1"
media_fetcher.allow_direct_ip(allow_internal)
media_fetcher.allow_direct_port(allow_internal)
# Register the model with runtime config # Register the model with runtime config
# Encode workers do NOT register - they're internal workers only # Encode workers do NOT register - they're internal workers only
......
...@@ -685,7 +685,9 @@ async def register_vllm_model( ...@@ -685,7 +685,9 @@ async def register_vllm_model(
media_fetcher = MediaFetcher() media_fetcher = MediaFetcher()
media_fetcher.timeout_ms(30000) media_fetcher.timeout_ms(30000)
media_fetcher.allow_direct_port(False) allow_internal = os.getenv("DYN_MM_ALLOW_INTERNAL", "0") == "1"
media_fetcher.allow_direct_ip(allow_internal)
media_fetcher.allow_direct_port(allow_internal)
await register_model( await register_model(
model_input, model_input,
......
...@@ -60,7 +60,7 @@ Two environment variables loosen the defaults for non-public deployments: ...@@ -60,7 +60,7 @@ Two environment variables loosen the defaults for non-public deployments:
| Variable | Default | Effect | | Variable | Default | Effect |
|----------|---------|--------| |----------|---------|--------|
| `DYN_MM_ALLOW_INTERNAL` | `0` | Set to `1` to allow `http://` and private / internal IP targets. Intended for on-prem or local-dev setups where media lives on an internal network. | | `DYN_MM_ALLOW_INTERNAL` | `0` | Set to `1` to allow `http://`, private / internal IPs, and explicit ports. Intended for on-prem or local-dev setups where media lives on an internal network. |
| `DYN_MM_LOCAL_PATH` | *(empty)* | Absolute directory prefix. When set, `file://` URIs and bare paths are allowed if they resolve inside this prefix. | | `DYN_MM_LOCAL_PATH` | *(empty)* | Absolute directory prefix. When set, `file://` URIs and bare paths are allowed if they resolve inside this prefix. |
<Warning> <Warning>
......
...@@ -371,7 +371,7 @@ vllm_configs = { ...@@ -371,7 +371,7 @@ vllm_configs = {
name="multimodal_agg_frontend_decoding", name="multimodal_agg_frontend_decoding",
directory=vllm_dir, directory=vllm_dir,
script_name="agg_multimodal.sh", script_name="agg_multimodal.sh",
# post_merge because needs real NIXL not stub # TODO(DYN-2863): revert to post_merge once pre-merge validates the fix.
marks=[ marks=[
pytest.mark.gpu_1, pytest.mark.gpu_1,
pytest.mark.profiled_vram_gib(9.6), # actual profiled peak with kv-bytes pytest.mark.profiled_vram_gib(9.6), # actual profiled peak with kv-bytes
...@@ -379,10 +379,10 @@ vllm_configs = { ...@@ -379,10 +379,10 @@ vllm_configs = {
1_710_490_000 1_710_490_000
), # KV cache cap (2x safety over min=855_244_800) ), # KV cache cap (2x safety over min=855_244_800)
pytest.mark.timeout(220), # ~5x observed 43.7s; 2B model loads slower on CI pytest.mark.timeout(220), # ~5x observed 43.7s; 2B model loads slower on CI
pytest.mark.post_merge, pytest.mark.pre_merge,
], ],
model="Qwen/Qwen2-VL-2B-Instruct", model="Qwen/Qwen2-VL-2B-Instruct",
# Pass --frontend-decoding to enable Rust frontend image decoding + NIXL RDMA transfer env={"DYN_MM_ALLOW_INTERNAL": "1"},
script_args=[ script_args=[
"--model", "--model",
"Qwen/Qwen2-VL-2B-Instruct", "Qwen/Qwen2-VL-2B-Instruct",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment