Unverified Commit e5ac6a41 authored by Tyler Michael Smith's avatar Tyler Michael Smith Committed by GitHub
Browse files

[Bugfix] Fix divide by zero when serving Mamba models (#9617)


Signed-off-by: default avatarTyler Michael Smith <tyler@neuralmagic.com>
parent dbdd3b5e
...@@ -1612,7 +1612,7 @@ class LLMEngine: ...@@ -1612,7 +1612,7 @@ class LLMEngine:
# KV Cache Usage in % # KV Cache Usage in %
num_total_gpu = self.cache_config.num_gpu_blocks num_total_gpu = self.cache_config.num_gpu_blocks
gpu_cache_usage_sys = 0. gpu_cache_usage_sys = 0.
if num_total_gpu is not None: if num_total_gpu: # Guard against both None and 0
num_free_gpu = sum( num_free_gpu = sum(
scheduler.block_manager.get_num_free_gpu_blocks() scheduler.block_manager.get_num_free_gpu_blocks()
for scheduler in self.scheduler) for scheduler in self.scheduler)
...@@ -1620,7 +1620,7 @@ class LLMEngine: ...@@ -1620,7 +1620,7 @@ class LLMEngine:
num_total_cpu = self.cache_config.num_cpu_blocks num_total_cpu = self.cache_config.num_cpu_blocks
cpu_cache_usage_sys = 0. cpu_cache_usage_sys = 0.
if num_total_cpu is not None and num_total_cpu > 0: if num_total_cpu: # Guard against both None and 0
num_free_cpu = sum( num_free_cpu = sum(
scheduler.block_manager.get_num_free_cpu_blocks() scheduler.block_manager.get_num_free_cpu_blocks()
for scheduler in self.scheduler) for scheduler in self.scheduler)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment