Add comments on swap space (#154)

3f92038b · Woosuk Kwon · GitHub · dcda03b4 · 3f92038b · 3f92038b
Unverified Commit 3f92038b authored Jun 18, 2023 by Woosuk Kwon Committed by GitHub Jun 18, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 2 deletions

benchmarks/benchmark_serving.py benchmarks/benchmark_serving.py +2 -1

vllm/core/scheduler.py vllm/core/scheduler.py +6 -1

No files found.
--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@@ -3,7 +3,8 @@
 On the server side, run one of the following commands:
    (vLLM backend)
    python -m vllm.entrypoints.api_server \
-        --disable-log-requests --model <your_model>
+        --model <your_model> --swap-space 16 \
+        --disable-log-requests
    (TGI backend)
    ./launch_hf_server.sh <your_model>

--- a/vllm/core/scheduler.py
+++ b/vllm/core/scheduler.py
@@ -409,7 +409,12 @@ class Scheduler:
        seq_group: SequenceGroup,
        blocks_to_swap_out: Dict[int, int],
    ) -> None:
-        assert self.block_manager.can_swap_out(seq_group)
+        if not self.block_manager.can_swap_out(seq_group):
+            # FIXME(woosuk): Abort the sequence group instead of aborting the
+            # entire engine.
+            raise RuntimeError(
+                "Aborted due to the lack of CPU swap space. Please increase "
+                "the swap space to avoid this error.")
        mapping = self.block_manager.swap_out(seq_group)
        blocks_to_swap_out.update(mapping)
        for seq in seq_group.get_seqs(status=SequenceStatus.RUNNING):