[V0 Deprecation] Remove unused swap_space parameter (#36216)

Signed-off-by: majiayu000 <1835304752@qq.com> Co-authored-by: mcelrath

[V0 Deprecation] Remove unused swap_space parameter (#36216)
Signed-off-by: majiayu000 <1835304752@qq.com> Co-authored-by: mcelrath
00b814ba · lif · GitHub · ee8a2951 · 00b814ba · 00b814ba
Unverified Commit 00b814ba authored Mar 07, 2026 by lif Committed by GitHub Mar 07, 2026
Hide whitespace changes
Inline Side-by-side

Showing with 11 additions and 11 deletions

vllm/engine/arg_utils.py vllm/engine/arg_utils.py +0 -3

vllm/entrypoints/llm.py vllm/entrypoints/llm.py +11 -8

No files found.
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -447,7 +447,6 @@ class EngineArgs:
    )
    disable_sliding_window: bool = ModelConfig.disable_sliding_window
    disable_cascade_attn: bool = ModelConfig.disable_cascade_attn
-    swap_space: float = CacheConfig.swap_space
    offload_backend: str = OffloadConfig.offload_backend
    cpu_offload_gb: float = UVAOffloadConfig.cpu_offload_gb
    cpu_offload_params: set[str] = get_field(UVAOffloadConfig, "cpu_offload_params")
@@ -961,7 +960,6 @@ class EngineArgs:
        cache_group.add_argument(
            "--kv-cache-memory-bytes", **cache_kwargs["kv_cache_memory_bytes"]
        )
-        cache_group.add_argument("--swap-space", **cache_kwargs["swap_space"])
        cache_group.add_argument("--kv-cache-dtype", **cache_kwargs["cache_dtype"])
        cache_group.add_argument(
            "--num-gpu-blocks-override", **cache_kwargs["num_gpu_blocks_override"]
@@ -1526,7 +1524,6 @@ class EngineArgs:
            block_size=self.block_size,
            gpu_memory_utilization=self.gpu_memory_utilization,
            kv_cache_memory_bytes=self.kv_cache_memory_bytes,
-            swap_space=self.swap_space,
            cache_dtype=resolved_cache_dtype,  # type: ignore[arg-type]
            is_attention_free=model_config.is_attention_free,
            num_gpu_blocks_override=self.num_gpu_blocks_override,

--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -164,12 +164,6 @@ class LLM:
            compared with using gpu_memory_utilization. Note that
            kv_cache_memory_bytes (when not-None) ignores
            gpu_memory_utilization
-        swap_space: The size (GiB) of CPU memory per GPU to use as swap space.
-            This can be used for temporarily storing the states of the requests
-            when their `best_of` sampling parameters are larger than 1. If all
-            requests will have `best_of=1`, you can safely set this to 0.
-            Noting that `best_of` is only supported in V0. Otherwise, too small
-            values may cause out-of-memory (OOM) errors.
        cpu_offload_gb: The size (GiB) of CPU memory to use for offloading
            the model weights. This virtually increases the GPU memory space
            you can use to hold the model weights, at the cost of CPU-GPU data
@@ -240,7 +234,6 @@ class LLM:
        chat_template: Path | str | None = None,
        seed: int = 0,
        gpu_memory_utilization: float = 0.9,
-        swap_space: float = 4,
        cpu_offload_gb: float = 0,
        offload_group_size: int = 0,
        offload_num_in_group: int = 1,
@@ -265,6 +258,17 @@ class LLM:
    ) -> None:
        """LLM constructor."""

+        if "swap_space" in kwargs:
+            kwargs.pop("swap_space")
+            import warnings
+
+            warnings.warn(
+                "The 'swap_space' parameter is deprecated and ignored. "
+                "It will be removed in a future version.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+
        if "disable_log_stats" not in kwargs:
            kwargs["disable_log_stats"] = True

@@ -353,7 +357,6 @@ class LLM:
            seed=seed,
            gpu_memory_utilization=gpu_memory_utilization,
            kv_cache_memory_bytes=kv_cache_memory_bytes,
-            swap_space=swap_space,
            cpu_offload_gb=cpu_offload_gb,
            offload_group_size=offload_group_size,
            offload_num_in_group=offload_num_in_group,