[Misc] Enable Paligemma's PrefixLM attention mask computation (#31725)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>

[Misc] Enable Paligemma's PrefixLM attention mask computation (#31725)
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
51e38a8e · Isotr0py · GitHub · d8e38d49 · 51e38a8e · 51e38a8e
Unverified Commit 51e38a8e authored Jan 06, 2026 by Isotr0py Committed by GitHub Jan 06, 2026
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 7 deletions

tests/models/multimodal/generation/test_common.py tests/models/multimodal/generation/test_common.py +0 -4

vllm/config/model.py vllm/config/model.py +1 -3

No files found.
--- a/tests/models/multimodal/generation/test_common.py
+++ b/tests/models/multimodal/generation/test_common.py
@@ -121,10 +121,6 @@ VLM_TEST_SETTINGS = {
        ),
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.paligemma_vllm_to_hf_output,
-        dtype="bfloat16",
-        marks=[
-            pytest.mark.skip(reason="vLLM does not support PrefixLM attention mask")
-        ],
    ),
    "qwen2_5_vl": VLMTestInfo(
        models=["Qwen/Qwen2.5-VL-3B-Instruct"],

--- a/vllm/config/model.py
+++ b/vllm/config/model.py
@@ -1097,9 +1097,7 @@ class ModelConfig:
        """Whether to use bidirectional attention for mm positions."""
        MM_PREFIX_LM_MODELS = (
            "gemma3",
-            # TODO(Isotr0py): Disable paligemma for now before
-            # we supports soft cap attention for FlexAttention
-            # "paligemma",
+            "paligemma",
        )
        if not hasattr(self.hf_config, "model_type"):
            return False