Commit eb38edbc authored by zhuwenwen's avatar zhuwenwen
Browse files

skip is_quantized_kv_cache

parent 10bca78b
......@@ -65,9 +65,9 @@ class TritonMLAImpl(MLACommonImpl[MLACommonMetadata]):
"are not implemented for "
"TritonMLAImpl")
if is_quantized_kv_cache(self.kv_cache_dtype):
raise NotImplementedError(
"TritonMLA V1 with FP8 KV cache not yet supported")
# if is_quantized_kv_cache(self.kv_cache_dtype):
# raise NotImplementedError(
# "TritonMLA V1 with FP8 KV cache not yet supported")
self.use_triton_flash_attn = envs.VLLM_USE_TRITON_FLASH_ATTN
self.triton_fa_func = triton_attention if HAS_TRITON else None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment