Unverified Commit bf5f121c authored by Zhuohan Li's avatar Zhuohan Li Committed by GitHub
Browse files

Reduce GPU memory utilization to make sure OOM doesn't happen (#153)

parent bec7b2dc
...@@ -21,7 +21,7 @@ class EngineArgs: ...@@ -21,7 +21,7 @@ class EngineArgs:
tensor_parallel_size: int = 1 tensor_parallel_size: int = 1
block_size: int = 16 block_size: int = 16
swap_space: int = 4 # GiB swap_space: int = 4 # GiB
gpu_memory_utilization: float = 0.95 gpu_memory_utilization: float = 0.90
max_num_batched_tokens: int = 2560 max_num_batched_tokens: int = 2560
max_num_seqs: int = 256 max_num_seqs: int = 256
disable_log_stats: bool = False disable_log_stats: bool = False
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment