Commit 96a982ad authored by OlivierDehaene's avatar OlivierDehaene
Browse files

fix: better warmup error

parent f9910d13
......@@ -670,7 +670,7 @@ class FlashCausalLM(Model):
self.device,
)
_, batch = self.generate_token(batch)
except Exception as e:
except torch.cuda.OutOfMemoryError as e:
raise RuntimeError(
f"Not enough memory to handle {len(batch.input_ids)} prefill tokens. "
f"You need to decrease `--max-batch-prefill-tokens`"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment