Commit eb38edbc authored by zhuwenwen's avatar zhuwenwen
Browse files

skip is_quantized_kv_cache

parent 10bca78b
...@@ -65,9 +65,9 @@ class TritonMLAImpl(MLACommonImpl[MLACommonMetadata]): ...@@ -65,9 +65,9 @@ class TritonMLAImpl(MLACommonImpl[MLACommonMetadata]):
"are not implemented for " "are not implemented for "
"TritonMLAImpl") "TritonMLAImpl")
if is_quantized_kv_cache(self.kv_cache_dtype): # if is_quantized_kv_cache(self.kv_cache_dtype):
raise NotImplementedError( # raise NotImplementedError(
"TritonMLA V1 with FP8 KV cache not yet supported") # "TritonMLA V1 with FP8 KV cache not yet supported")
self.use_triton_flash_attn = envs.VLLM_USE_TRITON_FLASH_ATTN self.use_triton_flash_attn = envs.VLLM_USE_TRITON_FLASH_ATTN
self.triton_fa_func = triton_attention if HAS_TRITON else None self.triton_fa_func = triton_attention if HAS_TRITON else None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment