Commit 442abc67 authored by xiabo's avatar xiabo
Browse files

支持kvacache fp8_e4m3/fp8_e5m2

支持kvacache fp8_e4m3/fp8_e5m2的RMS_ROPE_CONCAT
parent 0e5a20b3
......@@ -2190,7 +2190,7 @@ class MLACommonImpl(MLACommonBaseImpl[M], Generic[M]):
prefill_k_pe = k_pe[num_decode_tokens:]
kv_cache_dtype_str: str | None = None
# if use_fused_rms_rope_concat:
if not envs.VLLM_USE_LIGHTOP_RMS_ROPE_CONCAT:
decode_q = q[:num_decode_tokens]
prefill_q = q[num_decode_tokens:]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment