Unverified Commit 2cc92bfa authored by GuanLuo's avatar GuanLuo Committed by GitHub
Browse files

fix: restrict dummy embedding value range for bypassing vLLM check in E/P/D (#7117)


Signed-off-by: default avatarGuan Luo <41310872+GuanLuo@users.noreply.github.com>
parent a274ef82
...@@ -272,7 +272,15 @@ def construct_qwen_decode_mm_data( ...@@ -272,7 +272,15 @@ def construct_qwen_decode_mm_data(
# This prevents prefix cache from incorrectly matching different images # This prevents prefix cache from incorrectly matching different images
# that happen to have the same dimensions (same image_grid_thw). # that happen to have the same dimensions (same image_grid_thw).
# bit ops to convert request ID to somewhat unique value that fits in the dtype range # bit ops to convert request ID to somewhat unique value that fits in the dtype range
fill_value = hash(request_id) & ((1 << (dtype.itemsize * 8)) - 1) if not hasattr(construct_qwen_decode_mm_data, "_counter"):
construct_qwen_decode_mm_data._counter = 0
fill_value = construct_qwen_decode_mm_data._counter
construct_qwen_decode_mm_data._counter += 1
max_val = (
torch.finfo(dtype).max if dtype.is_floating_point else torch.iinfo(dtype).max
)
if construct_qwen_decode_mm_data._counter > max_val:
construct_qwen_decode_mm_data._counter = 0
image_embeds = torch.full( image_embeds = torch.full(
embeddings_shape, fill_value=fill_value, dtype=dtype, device="cpu" embeddings_shape, fill_value=fill_value, dtype=dtype, device="cpu"
) )
......
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Unit tests for DynamoMultimodalEmbeddingCacheConnector."""
import pytest
import torch
from dynamo.vllm.multimodal_utils.model import construct_qwen_decode_mm_data
pytestmark = [
pytest.mark.pre_merge,
pytest.mark.vllm,
pytest.mark.gpu_0,
pytest.mark.multimodal,
]
class TestMultiModalUtils:
def test_construct_qwen_decode_mm_data(self):
max_rounds = int(torch.finfo(torch.float16).max) + 2
expected_image_grid_thw_tensor = torch.tensor([16, 16])
for i in range(max_rounds):
# Should not raise any exception
try:
mm_data = construct_qwen_decode_mm_data(
image_grid_thw=[16, 16],
embeddings_shape=[2, 1024],
request_id=str(i),
)
except Exception as e:
pytest.fail(
f"construct_qwen_decode_mm_data raised {type(e).__name__} on round {i}: {e}"
)
assert "image" in mm_data
assert "image_grid_thw" in mm_data["image"]
assert "image_embeds" in mm_data["image"]
assert torch.allclose(
mm_data["image"]["image_grid_thw"], expected_image_grid_thw_tensor
)
# Embedding values are randomly genearted as placehodler, we only check the shape
assert mm_data["image"]["image_embeds"].shape == (2, 1024)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment