[BugFix] Fix pipeline parallel (#24621)

Signed-off-by: Nick Hill <nhill@redhat.com>

[BugFix] Fix pipeline parallel (#24621)
Signed-off-by: Nick Hill <nhill@redhat.com>
e2d8c27f · Nick Hill · GitHub · 29799dda · e2d8c27f · e2d8c27f
Unverified Commit e2d8c27f authored Sep 10, 2025 by Nick Hill Committed by GitHub Sep 10, 2025
3 changed files
--- a/vllm/executor/uniproc_executor.py
+++ b/vllm/executor/uniproc_executor.py
@@ -71,6 +71,10 @@ class UniProcExecutor(ExecutorBase):
            self.shutdown()
        return

+    def shutdown(self) -> None:
+        if worker := self.driver_worker:
+            worker.shutdown()
+

 UniProcExecutorAsync = UniProcExecutor


--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -2070,7 +2070,6 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
            sampler_output = self._sample(logits, spec_decode_metadata)

        with record_function_or_nullcontext("Bookkeep"):
-            assert isinstance(hidden_states, torch.Tensor)
            (
                num_nans_in_logits,
                logprobs_lists,

--- a/vllm/v1/worker/kv_connector_model_runner_mixin.py
+++ b/vllm/v1/worker/kv_connector_model_runner_mixin.py
@@ -45,7 +45,8 @@ class KVConnectorModelRunnerMixin:

    @staticmethod
    def ensure_kv_transfer_shutdown() -> None:
-        if has_kv_transfer_group():
+        # has_kv_transfer_group can be None during interpreter shutdown.
+        if has_kv_transfer_group and has_kv_transfer_group():
            ensure_kv_transfer_shutdown()

    @staticmethod