[Misc] normalize multiprocessing Queue usage (#22371)

Signed-off-by: Andy Xie <andy.xning@gmail.com>

[Misc] normalize multiprocessing Queue usage (#22371)
Signed-off-by: Andy Xie <andy.xning@gmail.com>
acf8aeb7 · Ning Xie · GitHub · 7e3a8dc9 · acf8aeb7
Unverified Commit acf8aeb7 authored Aug 08, 2025 by Ning Xie Committed by GitHub Aug 08, 2025
Show whitespace changes
Inline Side-by-side

Showing with 18 additions and 2 deletions

tests/test_sharded_state_loader.py tests/test_sharded_state_loader.py +18 -2

No files found.
--- a/tests/test_sharded_state_loader.py
+++ b/tests/test_sharded_state_loader.py
@@ -118,8 +118,17 @@ def test_sharded_state_loader(enable_lora, tp_size, num_gpus_available,
                            tensor_parallel_size=tp_size,
                        ))
        p.start()
-        p.join()
+        # Call queue.get() before p.join() to prevent deadlock:
+        # If p.join() is called before queue.get() and the queue is full,
+        # the child process may block while writing to the queue and never
+        # terminate, causing the parent to wait indefinitely on p.join().
+        # See: https://github.com/vllm-project/vllm/pull/22371#discussion_r2257773814
        out_before = queue.get()
+        p.join()
+        queue.close()
+        queue.join_thread()
+        queue = ctx.Queue()
        p = ctx.Process(target=_run_generate,
                        args=(output_dir, queue),
@@ -131,7 +140,14 @@ def test_sharded_state_loader(enable_lora, tp_size, num_gpus_available,
                            load_format="sharded_state",
                        ))
        p.start()
-        p.join()
+        # Call queue.get() before p.join() to prevent deadlock:
+        # If p.join() is called before queue.get() and the queue is full,
+        # the child process may block while writing to the queue and never
+        # terminate, causing the parent to wait indefinitely on p.join().
+        # See: https://github.com/vllm-project/vllm/pull/22371#discussion_r2257773814
        out_after = queue.get()
+        p.join()
+        queue.close()
+        queue.join_thread()
        assert out_before == out_after