Unverified Commit 1d6515ef authored by Huang Long's avatar Huang Long Committed by GitHub
Browse files

[Bugfix]Fix hang bug using dp attention with HiRadixCache (#7159)


Signed-off-by: default avatarhuanglong <huanglong@linux.alibaba.com>
parent dea8aa7a
...@@ -558,7 +558,11 @@ class Scheduler( ...@@ -558,7 +558,11 @@ class Scheduler(
self.tree_cache = HiRadixCache( self.tree_cache = HiRadixCache(
req_to_token_pool=self.req_to_token_pool, req_to_token_pool=self.req_to_token_pool,
token_to_kv_pool_allocator=self.token_to_kv_pool_allocator, token_to_kv_pool_allocator=self.token_to_kv_pool_allocator,
tp_cache_group=self.tp_cpu_group, tp_cache_group=(
self.attn_tp_cpu_group
if self.server_args.enable_dp_attention
else self.tp_cpu_group
),
page_size=self.page_size, page_size=self.page_size,
hicache_ratio=server_args.hicache_ratio, hicache_ratio=server_args.hicache_ratio,
hicache_size=server_args.hicache_size, hicache_size=server_args.hicache_size,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment