Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
561b6cbb
Commit
561b6cbb
authored
Apr 10, 2026
by
王敏
Browse files
merge dev主干代码
parents
0beafe40
ce47a56e
Changes
21
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
4 deletions
+6
-4
vllm/v1/worker/gpu_model_runner.py
vllm/v1/worker/gpu_model_runner.py
+6
-4
No files found.
vllm/v1/worker/gpu_model_runner.py
View file @
561b6cbb
...
@@ -829,8 +829,12 @@ class GPUModelRunner(
...
@@ -829,8 +829,12 @@ class GPUModelRunner(
non_blocking
=
True
,
non_blocking
=
True
,
)
)
return
return
# self.mrope_positions.gpu[:, :num_tokens].copy_(
# self.mrope_positions.cpu[:, :num_tokens],
# non_blocking=True,
# )
self
.
mrope_positions
.
gpu
[:,
:
num_tokens
].
copy_
(
self
.
mrope_positions
.
gpu
[:,
:
num_tokens
].
copy_
(
self
.
mrope_positions
.
cpu
[:,
:
num_tokens
],
self
.
mrope_positions
.
cpu
[:,
:
num_tokens
]
.
contiguous
().
pin_memory
()
,
non_blocking
=
True
,
non_blocking
=
True
,
)
)
...
@@ -6286,7 +6290,7 @@ class GPUModelRunner(
...
@@ -6286,7 +6290,7 @@ class GPUModelRunner(
return
kv_caches
return
kv_caches
def
_update_hybrid_attention_mamba_layout
(
def
_update_hybrid_attention_mamba_layout
(
self
,
kv_caches
:
dict
[
str
,
Any
]
self
,
kv_caches
:
dict
[
str
,
torch
.
Tensor
]
)
->
None
:
)
->
None
:
"""
"""
Update the layout of attention layers from (2, num_blocks, ...) to
Update the layout of attention layers from (2, num_blocks, ...) to
...
@@ -6300,8 +6304,6 @@ class GPUModelRunner(
...
@@ -6300,8 +6304,6 @@ class GPUModelRunner(
kv_cache_spec
=
group
.
kv_cache_spec
kv_cache_spec
=
group
.
kv_cache_spec
for
layer_name
in
group
.
layer_names
:
for
layer_name
in
group
.
layer_names
:
kv_cache
=
kv_caches
[
layer_name
]
kv_cache
=
kv_caches
[
layer_name
]
if
not
isinstance
(
kv_cache
,
torch
.
Tensor
):
continue
if
isinstance
(
kv_cache_spec
,
AttentionSpec
)
and
kv_cache
.
shape
[
0
]
==
2
:
if
isinstance
(
kv_cache_spec
,
AttentionSpec
)
and
kv_cache
.
shape
[
0
]
==
2
:
assert
kv_cache
.
shape
[
1
]
!=
2
,
(
assert
kv_cache
.
shape
[
1
]
!=
2
,
(
"Fail to determine whether the layout is "
"Fail to determine whether the layout is "
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment