Unverified Commit 0b7db411 authored by Zhuohan Li's avatar Zhuohan Li Committed by GitHub
Browse files

[Bug] Fix the OOM condition for CPU cache (#260)

parent 471a7a45
...@@ -128,7 +128,7 @@ class LLMEngine: ...@@ -128,7 +128,7 @@ class LLMEngine:
logger.info(f'# GPU blocks: {num_gpu_blocks}, ' logger.info(f'# GPU blocks: {num_gpu_blocks}, '
f'# CPU blocks: {num_cpu_blocks}') f'# CPU blocks: {num_cpu_blocks}')
if num_gpu_blocks <= 0 or num_cpu_blocks <= 0: if num_gpu_blocks <= 0:
raise ValueError("No available memory for the cache blocks. " raise ValueError("No available memory for the cache blocks. "
"Try increasing `gpu_memory_utilization` when " "Try increasing `gpu_memory_utilization` when "
"initializing the engine.") "initializing the engine.")
......
...@@ -113,6 +113,8 @@ class Worker: ...@@ -113,6 +113,8 @@ class Worker:
num_gpu_blocks = int((total_gpu_memory * gpu_memory_utilization num_gpu_blocks = int((total_gpu_memory * gpu_memory_utilization
- peak_memory) // cache_block_size) - peak_memory) // cache_block_size)
num_cpu_blocks = int(cpu_swap_space // cache_block_size) num_cpu_blocks = int(cpu_swap_space // cache_block_size)
num_gpu_blocks = max(num_gpu_blocks, 0)
num_cpu_blocks = max(num_cpu_blocks, 0)
torch.cuda.empty_cache() torch.cuda.empty_cache()
# Reset the seed to ensure that the random state is not affected by # Reset the seed to ensure that the random state is not affected by
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment