Suggest using dtype=half when OOM.

cb7a1c1c · Ben · GitHub · 7878958c · cb7a1c1c
Unverified Commit cb7a1c1c authored Jan 13, 2024 by Ben Committed by GitHub Jan 12, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 1 deletion

vllm/worker/worker.py vllm/worker/worker.py +3 -1

No files found.
--- a/vllm/worker/worker.py
+++ b/vllm/worker/worker.py
@@ -239,4 +239,6 @@ def _check_if_gpu_supports_dtype(torch_dtype: torch.dtype):
            raise ValueError(
                "Bfloat16 is only supported on GPUs with compute capability "
                f"of at least 8.0. Your {gpu_name} GPU has compute capability "
-                f"{compute_capability[0]}.{compute_capability[1]}.")
+                f"{compute_capability[0]}.{compute_capability[1]}."
+                f" You can explicitly specify the data type by using the --dtype option, for example: --dtype=half."
+            )