Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
61a45e7a
Unverified
Commit
61a45e7a
authored
May 26, 2025
by
Cyrus Leung
Committed by
GitHub
May 26, 2025
Browse files
[Bugfix] Fix Mistral-format models with sliding window (#18693)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
65523a09
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
10 additions
and
4 deletions
+10
-4
vllm/config.py
vllm/config.py
+10
-4
No files found.
vllm/config.py
View file @
61a45e7a
...
@@ -542,8 +542,10 @@ class ModelConfig:
...
@@ -542,8 +542,10 @@ class ModelConfig:
sliding_window
=
getattr
(
self
.
hf_text_config
,
"sliding_window"
,
None
)
sliding_window
=
getattr
(
self
.
hf_text_config
,
"sliding_window"
,
None
)
sliding_window_pattern
=
getattr
(
self
.
hf_text_config
,
sliding_window_pattern
=
getattr
(
self
.
hf_text_config
,
"sliding_window_pattern"
,
None
)
"sliding_window_pattern"
,
None
)
has_interleaved_attention
=
sliding_window_pattern
is
not
None
or
(
isinstance
(
sliding_window
,
list
))
if
not
(
self
.
disable_sliding_window
or
sliding_window_pattern
is
None
)
:
if
not
self
.
disable_sliding_window
and
has_interleaved_attention
:
if
(
backend
:
=
if
(
backend
:
=
envs
.
VLLM_ATTENTION_BACKEND
)
in
(
"XFORMERS"
,
"FLASHINFER"
):
envs
.
VLLM_ATTENTION_BACKEND
)
in
(
"XFORMERS"
,
"FLASHINFER"
):
sliding_window_len_min
=
get_min_sliding_window
(
sliding_window_len_min
=
get_min_sliding_window
(
...
@@ -563,7 +565,10 @@ class ModelConfig:
...
@@ -563,7 +565,10 @@ class ModelConfig:
# only the attention layer itself is aware of the sliding
# only the attention layer itself is aware of the sliding
# window, and use the window size to compute the attention.
# window, and use the window size to compute the attention.
self
.
hf_text_config
.
interleaved_sliding_window
=
sliding_window
self
.
hf_text_config
.
interleaved_sliding_window
=
sliding_window
delattr
(
self
.
hf_text_config
,
"sliding_window"
)
if
hasattr
(
self
.
hf_text_config
,
"sliding_window"
):
delattr
(
self
.
hf_text_config
,
"sliding_window"
)
sliding_window
=
None
sliding_window
=
None
self
.
max_model_len
=
_get_and_verify_max_len
(
self
.
max_model_len
=
_get_and_verify_max_len
(
...
@@ -1041,7 +1046,8 @@ class ModelConfig:
...
@@ -1041,7 +1046,8 @@ class ModelConfig:
if
self
.
use_async_output_proc
:
if
self
.
use_async_output_proc
:
self
.
use_async_output_proc
=
False
self
.
use_async_output_proc
=
False
def
get_hf_config_sliding_window
(
self
)
->
Optional
[
int
]:
def
get_hf_config_sliding_window
(
self
)
->
Union
[
Optional
[
int
],
list
[
Optional
[
int
]]]:
"""Get the sliding window size, or None if disabled."""
"""Get the sliding window size, or None if disabled."""
# Some models, like Qwen2 and Qwen1.5, use `use_sliding_window` in
# Some models, like Qwen2 and Qwen1.5, use `use_sliding_window` in
...
@@ -1052,7 +1058,7 @@ class ModelConfig:
...
@@ -1052,7 +1058,7 @@ class ModelConfig:
return
None
return
None
return
getattr
(
self
.
hf_text_config
,
"sliding_window"
,
None
)
return
getattr
(
self
.
hf_text_config
,
"sliding_window"
,
None
)
def
get_sliding_window
(
self
)
->
Optional
[
int
]:
def
get_sliding_window
(
self
)
->
Optional
[
Union
[
int
,
list
[
Optional
[
int
]]]
]:
"""Get the sliding window size, or None if disabled.
"""Get the sliding window size, or None if disabled.
"""
"""
# If user disables sliding window, return None.
# If user disables sliding window, return None.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment