[Bugfix][Model] Jamba assertions and no chunked prefill by default for Jamba (#6784)

ed94e4f4 · tomeras91 · GitHub · 3c301239 · ed94e4f4 · ed94e4f4
Unverified Commit ed94e4f4 authored Jul 27, 2024 by tomeras91 Committed by GitHub Jul 26, 2024
Show whitespace changes
Inline Side-by-side

Showing with 10 additions and 1 deletion

vllm/engine/arg_utils.py vllm/engine/arg_utils.py +5 -1

vllm/model_executor/models/jamba.py vllm/model_executor/models/jamba.py +5 -0

No files found.
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -754,10 +754,14 @@ class EngineArgs:
                use_sliding_window = (model_config.get_sliding_window()
                                      is not None)
                use_spec_decode = self.speculative_model is not None
+                has_seqlen_agnostic_layers = (
+                    model_config.contains_seqlen_agnostic_layers(
+                        parallel_config))
                if (is_gpu and not use_sliding_window and not use_spec_decode
                        and not self.enable_lora
                        and not self.enable_prompt_adapter
-                        and not self.enable_prefix_caching):
+                        and not self.enable_prefix_caching
+                        and not has_seqlen_agnostic_layers):
                    self.enable_chunked_prefill = True
                    logger.warning(
                        "Chunked prefill is enabled by default for models with "

--- a/vllm/model_executor/models/jamba.py
+++ b/vllm/model_executor/models/jamba.py
@@ -644,6 +644,11 @@ class JambaForCausalLM(nn.Module, HasInnerState):
        lora_config: Optional[LoRAConfig] = None,
        scheduler_config: Optional[SchedulerConfig] = None,
    ) -> None:
+        assert not scheduler_config.chunked_prefill_enabled, \
+            "Jamba currently does not support chunked prefill"
+        assert not cache_config.enable_prefix_caching, \
+            "Jamba currently does not support prefix caching"
+
        super().__init__()
        self.config = config
        self.scheduler_config = scheduler_config