Unverified Commit 97d6c30c authored by WeiQing Chen's avatar WeiQing Chen Committed by GitHub
Browse files

[BugFix] Fix shared storage connector load kv only load attention layer (#21428)


Signed-off-by: default avatarDavid Chen <530634352@qq.com>
parent a40a8506
...@@ -156,8 +156,16 @@ class SharedStorageConnector(KVConnectorBase_V1): ...@@ -156,8 +156,16 @@ class SharedStorageConnector(KVConnectorBase_V1):
logger.info("Inject KV cache of %d tokens to the paged memory", logger.info("Inject KV cache of %d tokens to the paged memory",
len(request.slot_mapping)) len(request.slot_mapping))
for layer_name in forward_context.no_compile_layers: for layer_name in forward_context.no_compile_layers:
attn_layer = forward_context.no_compile_layers[layer_name] layer = forward_context.no_compile_layers[layer_name]
kv_cache_layer = attn_layer.kv_cache[\
# Only process layers that have kv_cache
# attribute (attention layers) Skip non-attention
# layers like FusedMoE/MLP etc.
kv_cache_attr = getattr(layer, 'kv_cache', None)
if kv_cache_attr is None:
continue
kv_cache_layer = kv_cache_attr[ \
forward_context.virtual_engine] forward_context.virtual_engine]
filename = self._generate_filename_debug( filename = self._generate_filename_debug(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment