merge dev主干代码

561b6cbb · 王敏 · 0beafe40 · ce47a56e · 561b6cbb
Commit 561b6cbb authored Apr 10, 2026 by 王敏
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 4 deletions

vllm/v1/worker/gpu_model_runner.py vllm/v1/worker/gpu_model_runner.py +6 -4

No files found.
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -829,8 +829,12 @@ class GPUModelRunner(
                non_blocking=True,
            )
            return
+        # self.mrope_positions.gpu[:, :num_tokens].copy_(
+        #     self.mrope_positions.cpu[:, :num_tokens],
+        #     non_blocking=True,
+        # )
        self.mrope_positions.gpu[:, :num_tokens].copy_(
-            self.mrope_positions.cpu[:, :num_tokens],
+            self.mrope_positions.cpu[:, :num_tokens].contiguous().pin_memory(),
            non_blocking=True,
        )
@@ -6286,7 +6290,7 @@ class GPUModelRunner(
        return kv_caches
    def _update_hybrid_attention_mamba_layout(
-        self, kv_caches: dict[str, Any]
+        self, kv_caches: dict[str, torch.Tensor]
    ) -> None:
        """
        Update the layout of attention layers from (2, num_blocks, ...) to
@@ -6300,8 +6304,6 @@ class GPUModelRunner(
            kv_cache_spec = group.kv_cache_spec
            for layer_name in group.layer_names:
                kv_cache = kv_caches[layer_name]
-                if not isinstance(kv_cache, torch.Tensor):
-                    continue
                if isinstance(kv_cache_spec, AttentionSpec) and kv_cache.shape[0] == 2:
                    assert kv_cache.shape[1] != 2, (
                        "Fail to determine whether the layout is "