Unverified Commit ed94e4f4 authored by tomeras91's avatar tomeras91 Committed by GitHub
Browse files

[Bugfix][Model] Jamba assertions and no chunked prefill by default for Jamba (#6784)

parent 3c301239
......@@ -754,10 +754,14 @@ class EngineArgs:
use_sliding_window = (model_config.get_sliding_window()
is not None)
use_spec_decode = self.speculative_model is not None
has_seqlen_agnostic_layers = (
model_config.contains_seqlen_agnostic_layers(
parallel_config))
if (is_gpu and not use_sliding_window and not use_spec_decode
and not self.enable_lora
and not self.enable_prompt_adapter
and not self.enable_prefix_caching):
and not self.enable_prefix_caching
and not has_seqlen_agnostic_layers):
self.enable_chunked_prefill = True
logger.warning(
"Chunked prefill is enabled by default for models with "
......
......@@ -644,6 +644,11 @@ class JambaForCausalLM(nn.Module, HasInnerState):
lora_config: Optional[LoRAConfig] = None,
scheduler_config: Optional[SchedulerConfig] = None,
) -> None:
assert not scheduler_config.chunked_prefill_enabled, \
"Jamba currently does not support chunked prefill"
assert not cache_config.enable_prefix_caching, \
"Jamba currently does not support prefix caching"
super().__init__()
self.config = config
self.scheduler_config = scheduler_config
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment