Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
ed94e4f4
Unverified
Commit
ed94e4f4
authored
Jul 27, 2024
by
tomeras91
Committed by
GitHub
Jul 26, 2024
Browse files
[Bugfix][Model] Jamba assertions and no chunked prefill by default for Jamba (#6784)
parent
3c301239
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
10 additions
and
1 deletion
+10
-1
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+5
-1
vllm/model_executor/models/jamba.py
vllm/model_executor/models/jamba.py
+5
-0
No files found.
vllm/engine/arg_utils.py
View file @
ed94e4f4
...
@@ -754,10 +754,14 @@ class EngineArgs:
...
@@ -754,10 +754,14 @@ class EngineArgs:
use_sliding_window
=
(
model_config
.
get_sliding_window
()
use_sliding_window
=
(
model_config
.
get_sliding_window
()
is
not
None
)
is
not
None
)
use_spec_decode
=
self
.
speculative_model
is
not
None
use_spec_decode
=
self
.
speculative_model
is
not
None
has_seqlen_agnostic_layers
=
(
model_config
.
contains_seqlen_agnostic_layers
(
parallel_config
))
if
(
is_gpu
and
not
use_sliding_window
and
not
use_spec_decode
if
(
is_gpu
and
not
use_sliding_window
and
not
use_spec_decode
and
not
self
.
enable_lora
and
not
self
.
enable_lora
and
not
self
.
enable_prompt_adapter
and
not
self
.
enable_prompt_adapter
and
not
self
.
enable_prefix_caching
):
and
not
self
.
enable_prefix_caching
and
not
has_seqlen_agnostic_layers
):
self
.
enable_chunked_prefill
=
True
self
.
enable_chunked_prefill
=
True
logger
.
warning
(
logger
.
warning
(
"Chunked prefill is enabled by default for models with "
"Chunked prefill is enabled by default for models with "
...
...
vllm/model_executor/models/jamba.py
View file @
ed94e4f4
...
@@ -644,6 +644,11 @@ class JambaForCausalLM(nn.Module, HasInnerState):
...
@@ -644,6 +644,11 @@ class JambaForCausalLM(nn.Module, HasInnerState):
lora_config
:
Optional
[
LoRAConfig
]
=
None
,
lora_config
:
Optional
[
LoRAConfig
]
=
None
,
scheduler_config
:
Optional
[
SchedulerConfig
]
=
None
,
scheduler_config
:
Optional
[
SchedulerConfig
]
=
None
,
)
->
None
:
)
->
None
:
assert
not
scheduler_config
.
chunked_prefill_enabled
,
\
"Jamba currently does not support chunked prefill"
assert
not
cache_config
.
enable_prefix_caching
,
\
"Jamba currently does not support prefix caching"
super
().
__init__
()
super
().
__init__
()
self
.
config
=
config
self
.
config
=
config
self
.
scheduler_config
=
scheduler_config
self
.
scheduler_config
=
scheduler_config
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment