Commit 561b6cbb authored by 王敏's avatar 王敏
Browse files

merge dev主干代码

parents 0beafe40 ce47a56e
...@@ -829,8 +829,12 @@ class GPUModelRunner( ...@@ -829,8 +829,12 @@ class GPUModelRunner(
non_blocking=True, non_blocking=True,
) )
return return
# self.mrope_positions.gpu[:, :num_tokens].copy_(
# self.mrope_positions.cpu[:, :num_tokens],
# non_blocking=True,
# )
self.mrope_positions.gpu[:, :num_tokens].copy_( self.mrope_positions.gpu[:, :num_tokens].copy_(
self.mrope_positions.cpu[:, :num_tokens], self.mrope_positions.cpu[:, :num_tokens].contiguous().pin_memory(),
non_blocking=True, non_blocking=True,
) )
...@@ -6286,7 +6290,7 @@ class GPUModelRunner( ...@@ -6286,7 +6290,7 @@ class GPUModelRunner(
return kv_caches return kv_caches
def _update_hybrid_attention_mamba_layout( def _update_hybrid_attention_mamba_layout(
self, kv_caches: dict[str, Any] self, kv_caches: dict[str, torch.Tensor]
) -> None: ) -> None:
""" """
Update the layout of attention layers from (2, num_blocks, ...) to Update the layout of attention layers from (2, num_blocks, ...) to
...@@ -6300,8 +6304,6 @@ class GPUModelRunner( ...@@ -6300,8 +6304,6 @@ class GPUModelRunner(
kv_cache_spec = group.kv_cache_spec kv_cache_spec = group.kv_cache_spec
for layer_name in group.layer_names: for layer_name in group.layer_names:
kv_cache = kv_caches[layer_name] kv_cache = kv_caches[layer_name]
if not isinstance(kv_cache, torch.Tensor):
continue
if isinstance(kv_cache_spec, AttentionSpec) and kv_cache.shape[0] == 2: if isinstance(kv_cache_spec, AttentionSpec) and kv_cache.shape[0] == 2:
assert kv_cache.shape[1] != 2, ( assert kv_cache.shape[1] != 2, (
"Fail to determine whether the layout is " "Fail to determine whether the layout is "
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment