"git@developer.sourcefind.cn:OpenDAS/pytorch3d.git" did not exist on "174738c33e511f11ee7810772e9f80f6c734993a"
Unverified Commit 1d24ccb9 authored by Zhuohan Li's avatar Zhuohan Li Committed by GitHub
Browse files

[Fix] Better error message when there is OOM during cache initialization (#203)

parent 14f0b39c
...@@ -127,6 +127,12 @@ class LLMEngine: ...@@ -127,6 +127,12 @@ class LLMEngine:
# FIXME(woosuk): Change to debug log. # FIXME(woosuk): Change to debug log.
logger.info(f'# GPU blocks: {num_gpu_blocks}, ' logger.info(f'# GPU blocks: {num_gpu_blocks}, '
f'# CPU blocks: {num_cpu_blocks}') f'# CPU blocks: {num_cpu_blocks}')
if num_gpu_blocks <= 0 or num_cpu_blocks <= 0:
raise ValueError("No available memory for the cache blocks. "
"Try increasing `gpu_memory_utilization` when "
"initializing the engine.")
self.cache_config.num_gpu_blocks = num_gpu_blocks self.cache_config.num_gpu_blocks = num_gpu_blocks
self.cache_config.num_cpu_blocks = num_cpu_blocks self.cache_config.num_cpu_blocks = num_cpu_blocks
......
...@@ -53,6 +53,7 @@ class RequestOutput: ...@@ -53,6 +53,7 @@ class RequestOutput:
prompt: The prompt string of the request. prompt: The prompt string of the request.
prompt_token_ids: The token IDs of the prompt. prompt_token_ids: The token IDs of the prompt.
outputs: The output sequences of the request. outputs: The output sequences of the request.
finished: Whether the whole request is finished.
""" """
def __init__( def __init__(
self, self,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment