Commit 96a982ad authored by OlivierDehaene's avatar OlivierDehaene
Browse files

fix: better warmup error

parent f9910d13
...@@ -670,7 +670,7 @@ class FlashCausalLM(Model): ...@@ -670,7 +670,7 @@ class FlashCausalLM(Model):
self.device, self.device,
) )
_, batch = self.generate_token(batch) _, batch = self.generate_token(batch)
except Exception as e: except torch.cuda.OutOfMemoryError as e:
raise RuntimeError( raise RuntimeError(
f"Not enough memory to handle {len(batch.input_ids)} prefill tokens. " f"Not enough memory to handle {len(batch.input_ids)} prefill tokens. "
f"You need to decrease `--max-batch-prefill-tokens`" f"You need to decrease `--max-batch-prefill-tokens`"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment