Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
31060b27
Unverified
Commit
31060b27
authored
Mar 16, 2025
by
Woosuk Kwon
Committed by
GitHub
Mar 16, 2025
Browse files
[V1][BugFix] Detect interleaved sliding window attention (#14896)
Signed-off-by:
Woosuk Kwon
<
woosuk.kwon@berkeley.edu
>
parent
fc1f6771
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
9 additions
and
2 deletions
+9
-2
vllm/v1/worker/gpu_model_runner.py
vllm/v1/worker/gpu_model_runner.py
+9
-2
No files found.
vllm/v1/worker/gpu_model_runner.py
View file @
31060b27
...
...
@@ -82,8 +82,15 @@ class GPUModelRunner(LoRAModelRunnerMixin):
self
.
kv_cache_dtype
=
STR_DTYPE_TO_TORCH_DTYPE
[
cache_config
.
cache_dtype
]
self
.
is_multimodal_model
=
model_config
.
is_multimodal_model
# NOTE(woosuk): sliding_window is None for models with interleaved
# attention. Use interleaved_sliding_window instead.
self
.
sliding_window
=
model_config
.
get_sliding_window
()
self
.
interleaved_sliding_window
=
getattr
(
model_config
.
hf_text_config
,
"interleaved_sliding_window"
,
None
)
self
.
window_size
=
(
self
.
sliding_window
or
self
.
interleaved_sliding_window
)
self
.
is_multimodal_model
=
model_config
.
is_multimodal_model
self
.
block_size
=
cache_config
.
block_size
self
.
max_model_len
=
model_config
.
max_model_len
self
.
max_num_blocks_per_req
=
cdiv
(
self
.
max_model_len
,
self
.
block_size
)
...
...
@@ -674,7 +681,7 @@ class GPUModelRunner(LoRAModelRunnerMixin):
num_query_heads
=
self
.
num_query_heads
,
num_kv_heads
=
self
.
num_kv_heads
,
use_alibi
=
False
,
# FIXME
use_sliding_window
=
self
.
sliding_
window
is
not
None
,
use_sliding_window
=
self
.
window
_size
is
not
None
,
num_sms
=
self
.
num_sms
,
)
return
common_prefix_len
if
use_cascade
else
0
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment