Commit a3f4b5b8 authored by zhuwenwen's avatar zhuwenwen
Browse files

update weights_not_loaded and flash_mla_with_kvcache

parent d89f7579
......@@ -272,6 +272,7 @@ class DefaultModelLoader(BaseModelLoader):
# that have loaded weights tracking currently.
if model_config.quantization is None and loaded_weights is not None:
weights_not_loaded = weights_to_load - loaded_weights
weights_not_loaded = {k for k in weights_not_loaded if not k.endwith("indexer.weights_proj.bias")}
if weights_not_loaded:
raise ValueError("Following weights were not initialized from "
f"checkpoint: {weights_not_loaded}")
......@@ -210,6 +210,8 @@ class FlashMLAImpl(MLACommonImpl[FlashMLAMetadata]):
causal=True,
descale_q=layer._q_scale.reshape(1),
descale_k=layer._k_scale.reshape(1),
is_fp8_kvcache=False,
indices= None,
)
return o, lse
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment