skip is_quantized_kv_cache

eb38edbc · zhuwenwen · 10bca78b · eb38edbc
Commit eb38edbc authored Oct 03, 2025 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 3 deletions

vllm/v1/attention/backends/mla/triton_mla.py vllm/v1/attention/backends/mla/triton_mla.py +3 -3

No files found.
--- a/vllm/v1/attention/backends/mla/triton_mla.py
+++ b/vllm/v1/attention/backends/mla/triton_mla.py
@@ -65,9 +65,9 @@ class TritonMLAImpl(MLACommonImpl[MLACommonMetadata]):
                                      "are not implemented for "
                                      "TritonMLAImpl")
-        if is_quantized_kv_cache(self.kv_cache_dtype):
+        # if is_quantized_kv_cache(self.kv_cache_dtype):
-            raise NotImplementedError(
+        #     raise NotImplementedError(
-                "TritonMLA V1 with FP8 KV cache not yet supported")
+        #         "TritonMLA V1 with FP8 KV cache not yet supported")
        self.use_triton_flash_attn = envs.VLLM_USE_TRITON_FLASH_ATTN
        self.triton_fa_func = triton_attention if HAS_TRITON else None