Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
eb38edbc
Commit
eb38edbc
authored
Oct 03, 2025
by
zhuwenwen
Browse files
skip is_quantized_kv_cache
parent
10bca78b
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
3 additions
and
3 deletions
+3
-3
vllm/v1/attention/backends/mla/triton_mla.py
vllm/v1/attention/backends/mla/triton_mla.py
+3
-3
No files found.
vllm/v1/attention/backends/mla/triton_mla.py
View file @
eb38edbc
...
@@ -65,9 +65,9 @@ class TritonMLAImpl(MLACommonImpl[MLACommonMetadata]):
...
@@ -65,9 +65,9 @@ class TritonMLAImpl(MLACommonImpl[MLACommonMetadata]):
"are not implemented for "
"are not implemented for "
"TritonMLAImpl"
)
"TritonMLAImpl"
)
if
is_quantized_kv_cache
(
self
.
kv_cache_dtype
):
#
if is_quantized_kv_cache(self.kv_cache_dtype):
raise
NotImplementedError
(
#
raise NotImplementedError(
"TritonMLA V1 with FP8 KV cache not yet supported"
)
#
"TritonMLA V1 with FP8 KV cache not yet supported")
self
.
use_triton_flash_attn
=
envs
.
VLLM_USE_TRITON_FLASH_ATTN
self
.
use_triton_flash_attn
=
envs
.
VLLM_USE_TRITON_FLASH_ATTN
self
.
triton_fa_func
=
triton_attention
if
HAS_TRITON
else
None
self
.
triton_fa_func
=
triton_attention
if
HAS_TRITON
else
None
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment