[Bugfix] Fix divide by zero when serving Mamba models (#9617)

Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>

[Bugfix] Fix divide by zero when serving Mamba models (#9617)
Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
e5ac6a41 · Tyler Michael Smith · GitHub · dbdd3b5e · e5ac6a41
Unverified Commit e5ac6a41 authored Oct 23, 2024 by Tyler Michael Smith Committed by GitHub Oct 23, 2024
Show whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

vllm/engine/llm_engine.py vllm/engine/llm_engine.py +2 -2

No files found.
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -1612,7 +1612,7 @@ class LLMEngine:
        # KV Cache Usage in %
        num_total_gpu = self.cache_config.num_gpu_blocks
        gpu_cache_usage_sys = 0.
-        if num_total_gpu is not None:
+        if num_total_gpu:  # Guard against both None and 0
            num_free_gpu = sum(
                scheduler.block_manager.get_num_free_gpu_blocks()
                for scheduler in self.scheduler)
@@ -1620,7 +1620,7 @@ class LLMEngine:

        num_total_cpu = self.cache_config.num_cpu_blocks
        cpu_cache_usage_sys = 0.
-        if num_total_cpu is not None and num_total_cpu > 0:
+        if num_total_cpu:  # Guard against both None and 0
            num_free_cpu = sum(
                scheduler.block_manager.get_num_free_cpu_blocks()
                for scheduler in self.scheduler)