[ROCm][CI] Fix HuggingFace flash_attention_2 accuracy issue in Isaac vision encoder (#32233)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>

[ROCm][CI] Fix HuggingFace flash_attention_2 accuracy issue in Isaac vision encoder (#32233)
Signed-off-by: Andreas Karatzas <akaratza@amd.com>
5e714f7f · Andreas Karatzas · GitHub · 11b6af52 · 5e714f7f · 5e714f7f
Unverified Commit 5e714f7f authored Jan 13, 2026 by Andreas Karatzas Committed by GitHub Jan 12, 2026
Showing with 27 additions and 0 deletions

tests/models/multimodal/conftest.py tests/models/multimodal/conftest.py +19 -0

tests/models/multimodal/generation/vlm_utils/model_utils.py tests/models/multimodal/generation/vlm_utils/model_utils.py +8 -0

No files found.
--- a/tests/models/multimodal/conftest.py
+++ b/tests/models/multimodal/conftest.py
@@ -30,3 +30,22 @@ def pytest_collection_modifyitems(config, items):
        UserWarning,
        stacklevel=1,
    )
+def patch_hf_vision_attn_for_rocm(model):
+    """Force SDPA for HF vision encoders on ROCm.
+    HF's flash_attention_2 has accuracy issues on ROCm that bypass
+    torch.backends.cuda settings. This forces SDPA which then uses
+    math_sdp via the pytest_collection_modifyitems settings.
+    """
+    if not current_platform.is_rocm():
+        return
+    inner = getattr(model, "model", model)
+    if hasattr(inner, "vision_embedding"):
+        vit = inner.vision_embedding[0]
+        for layer in vit.encoder.layers:
+            if hasattr(layer, "self_attn"):
+                layer.self_attn.vision_config._attn_implementation = "sdpa"
--- a/tests/models/multimodal/generation/vlm_utils/model_utils.py
+++ b/tests/models/multimodal/generation/vlm_utils/model_utils.py
@@ -576,6 +576,14 @@ def isaac_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
    # ----------------------------
    isaac_model = hf_model.model.model
+    # [ROCm] Disable Flash/MemEfficient SDP on ROCm to avoid HF Transformers
+    # accuracy issues: https://github.com/vllm-project/vllm/issues/30167
+    # TODO: Remove once ROCm SDP accuracy issues are resolved on HuggingFace
+    # ----------------------------
+    from ...conftest import patch_hf_vision_attn_for_rocm
+    patch_hf_vision_attn_for_rocm(hf_model.model)
    def patched_forward(
        self,
        input_ids=None,